### Numpy array vs Python lists

In [45]:
# speed
# list
import time
a  = [i for i in range(10000000)]
b  = [i for i in range(10000000,20000000)]
c = []

start = time.time()

for i in range(len(a)):
    c.append(a[i] +b[i])
print(time.time()-start)





0.6953721046447754


In [50]:
# numpy
import numpy as np
a = np.arange(10000000)
b = np.arange(10000000,20000000)
start = time.time()
c = a+b
time.time() - start

0.013104915618896484

In [51]:
0.6953721046447754/0.010962963104248047

63.42921143055978

In [56]:
# memory
#list
import time
import sys
a  = [i for i in range(10000000)]
b  = [i for i in range(10000000,20000000)]
sys.getsizeof(a)



89095160

In [60]:
# numpy
import numpy as np
a = np.arange(10000000,dtype=np.int8)
b = np.arange(10000000,20000000)
sys.getsizeof(a)

10000112

In [62]:

89095160/10000112

# convenience

8.909416214538398

### Advanced Indexing

In [73]:
# Normal Indexing and slicing

ohlc1 = np.array([
    [100.1, 101.5, 99.5, 101.0],
    [101.3, 102.9, 100.5, 101.8],
    [101.8, 103.0, 101.0, 102.5]
])

ohlc2 = np.array([
    [200.0, 201.5, 299.5, 201.0],
    [201.0, 202.0, 200.5, 201.8],
    [201.8, 203.0, 201.0, 202.5],
    [201.8, 203.0, 201.0, 202.5]
])

In [75]:
ohlc1[1:,2:]

array([[100.5, 101.8],
       [101. , 102.5]])

In [76]:
# Fancy Indexing

ohlc1[[1],[2]]

array([100.5])

In [81]:
ohlc1[1:,[2,3]]

array([[100.5, 101.8],
       [101. , 102.5]])

In [83]:
ohlc2[[0,3],1:3]

array([[201.5, 299.5],
       [203. , 201. ]])

In [95]:
# Boolean Indexing
ohlc1 = np.array([
    [100.1, 101.5, 99.5, 101.0],
    [101.3, 102.9, 100.5, 101.8],
    [101.8, 103.0, 101.0, 102.5]
])

ohlc2 = np.array([
    [200.0, 201.5, 299.5, 201.0],
    [201.0, 202.0, 200.5, 201.8],
    [201.8, 204.0, 201.0, 202.5],
    [201.8, 203.0, 206.0, 202.5]
])


In [85]:
ohlc1[ohlc1 > 102]

array([102.9, 103. , 102.5])

In [87]:
# find all numbers greater than 202 from ohlc2

ohlc2[ohlc2 > 202]


array([299.5, 203. , 202.5, 203. , 202.5])

In [96]:
# find out even numbers

ohlc2[(ohlc2 % 2 == 0.0) & (ohlc2 > 202)]


array([204., 206.])

In [None]:
# find all numbers greater than 50 and are even



In [None]:
# find all numbers not divisible by 7


### Broadcasting

The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic operations.

The smaller array is “broadcast” across the larger array so that they have compatible shapes.

In [98]:
# same shape


ohlc1 = np.array([
    [100.1, 101.5, 99.5, 101.0],
    [101.3, 102.9, 100.5, 101.8],
    [101.8, 103.0, 101.0, 102.5]
])

ohlc2 = np.array([
    [200.0, 201.5, 299.5, 201.0],
    [201.0, 202.0, 200.5, 201.8],
    [201.8, 204.0, 201.0, 202.5]
  
])

ohlc1 +ohlc2

array([[300.1, 303. , 399. , 302. ],
       [302.3, 304.9, 301. , 303.6],
       [303.6, 307. , 302. , 305. ]])

In [99]:
# diff shape

ohlc1 = np.array([
    [100.1, 101.5, 99.5, 101.0],
    [101.3, 102.9, 100.5, 101.8],
    [101.8, 103.0, 101.0, 102.5]
])

ohlc2 = np.array([
    [200.0, 201.5, 299.5, 201.0],
    [201.0, 202.0, 200.5, 201.8],
    [201.8, 204.0, 201.0, 202.5],
    [201.8, 204.0, 201.0, 202.5]
  
])

ohlc1 +ohlc2

ValueError: operands could not be broadcast together with shapes (3,4) (4,4) 

#### Broadcasting Rules

**1. Make the two arrays have the same number of dimensions.**<br>
- If the numbers of dimensions of the two arrays are different, add new dimensions with size 1 to the head of the array with the smaller dimension.<br>

**2. Make each dimension of the two arrays the same size.**<br>
- If the sizes of each dimension of the two arrays do not match, dimensions with size 1 are stretched to the size of the other array.
- If there is a dimension whose size is not 1 in either of the two arrays, it cannot be broadcasted, and an error is raised.

<img src = "https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png">

In [102]:
ohlc1 = np.array([
    [100.1, 101.5, 99.5, 101.0],
    [101.3, 102.9, 100.5, 101.8],
    [101.8, 103.0, 101.0, 102.5]
])

ohlc2 = np.array([
    [200.0, 201.5, 299.5, 201.0]
  
   
 
  
])

ohlc1 +ohlc2

array([[300.1, 303. , 399. , 302. ],
       [301.3, 304.4, 400. , 302.8],
       [301.8, 304.5, 400.5, 303.5]])

In [107]:
# More examples

a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [106]:
b = np.arange(4).reshape(1,4)
b

array([[0, 1, 2, 3]])

In [108]:
a+b

array([[ 0,  2,  4,  6],
       [ 4,  6,  8, 10],
       [ 8, 10, 12, 14]])

In [None]:

# shape -> (1,1)

# shape -> (2,2)



### Working with mathematical formulas

In [121]:
ohlc1 = np.array([
    [100.1, 101.5, 99.5, 101.0],
    [101.3, 102.9, 100.5, 101.8],
    [101.8, 103.0, 101.0, 102.5]
])

ohlc2 = np.array([
    [200.0, 201.5, 299.5, 201.0],
    [201.0, 202.0, 200.5, 201.8],
    [201.8, 204.0, 201.0, 202.5]
   
  
])

np.sin(ohlc1)

array([[-0.41774768,  0.82433986, -0.85779535,  0.45202579],
       [ 0.69544219,  0.69796907, -0.03095997,  0.95481453],
       [ 0.95481453,  0.62298863,  0.45202579,  0.92174542]])

In [119]:
# sigmoid
1/(1+ np.exp(-(ohlc1)))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [122]:
# mean squared error
def mse(ohlc1,ohlc2):
    return np.mean((ohlc1-ohlc2)**2)

mse(ohlc1,ohlc2)


12495.15916666667

### Working with missing values

In [124]:
# Working with missing values -> np.nan

ohlc1 = np.array([
    [100.1, 101.5, np.nan, 101.0],
    [101.3, 102.9, np.nan, 101.8],
    [np.nan, 103.0, 101.0, 102.5]
])
ohlc1

array([[100.1, 101.5,   nan, 101. ],
       [101.3, 102.9,   nan, 101.8],
       [  nan, 103. , 101. , 102.5]])

In [128]:
ohlc1[~np.isnan(ohlc1)]

array([100.1, 101.5, 101. , 101.3, 102.9, 101.8, 103. , 101. , 102.5])