In [1]:
import numpy as np
import sys

### Numpy array vs Python lists

- Speed difference

In [2]:
a = [i for i in range(100000000)]
b = [i for i in range(100000000,200000000)]

c = []
import time 

start = time.time()
for i in range(len(a)):
    c.append(a[i] + b[i])
finish = time.time()
list_total_time = finish - start
print(f"Total Time took to Loop through list: {list_total_time}")


Total Time took to Loop through list: 8.29275894165039


In [3]:
a = np.arange(100000000)
b = np.arange(100000000,200000000)

import time 

start = time.time()

c = a + b

finish = time.time()
np_total_time = finish - start
print(f"Total Time took to Loop through numpy array: {np_total_time}")


Total Time took to Loop through numpy array: 0.45028042793273926


In [4]:
list_total_time/np_total_time

18.416876300226676

In [5]:
# memory
a = [i for i in range(10000000)]
import sys

list_memory = sys.getsizeof(a)
list_memory

89095160

In [6]:
b = np.arange(10000000, dtype=np.int64)
print(b.dtype)
np_memory = sys.getsizeof(b)
np_memory

int64


80000112

In [7]:
list_memory - np_memory

9095048

### Advance Indexing
*****************


In [8]:
# Normal Indexing and slicing
a = np.arange(24).reshape(6,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [9]:
a[1:2,:]

array([[4, 5, 6, 7]])

In [10]:
## For fetching 4
a[1:2,0:1]

array([[4]])

- Fancy Indexing


In [11]:
# Fancy Indexing
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [12]:
## Fetching specific rows
a[[0,2,3,5]]

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [20, 21, 22, 23]])

In [13]:
## Fetching specific Column

a[2:4,[0,1,3]]

array([[ 8,  9, 11],
       [12, 13, 15]])

- Boolean Indexing


In [14]:
# Boolean Indexing
a = np.random.randint(1,100,24).reshape(6,4)
a

array([[90, 41, 19, 61],
       [61,  7, 56, 57],
       [67, 67, 78, 60],
       [98, 70, 82, 79],
       [39, 98, 11, 93],
       [33, 92, 55, 42]], dtype=int32)

In [15]:
# find all numbers greater than 50
a[a > 50]

array([90, 61, 61, 56, 57, 67, 67, 78, 60, 98, 70, 82, 79, 98, 93, 92, 55],
      dtype=int32)

In [16]:
# find out even numbers
a[a % 2 == 0]

array([90, 56, 78, 60, 98, 70, 82, 98, 92, 42], dtype=int32)

In [17]:
# find all numbers greater than 50 and are even

a[(a > 50) & (a % 2 == 0)]

array([90, 56, 78, 60, 98, 70, 82, 98, 92], dtype=int32)

In [18]:
# find all numbers not divisible by 7
a[~(a % 7 == 0)]

array([90, 41, 19, 61, 61, 57, 67, 67, 78, 60, 82, 79, 39, 11, 93, 33, 92,
       55], dtype=int32)

### Broadcasting

The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic operations.

The smaller array is “broadcast” across the larger array so that they have compatible shapes.

In [21]:
a = np.arange(6).reshape(2,3)
b = np.arange(6,12).reshape(2,3)

In [29]:
print(a)
print(b)

[[0 1 2]
 [3 4 5]]
[[ 6  7  8]
 [ 9 10 11]]


In [30]:
a+b

array([[ 6,  8, 10],
       [12, 14, 16]])

In [31]:
aa = np.arange(6).reshape(2,3)
bb = np.arange(3).reshape(1,3)

In [32]:
print(aa)
print(bb)

[[0 1 2]
 [3 4 5]]
[[0 1 2]]


In [33]:
aa+bb

array([[0, 2, 4],
       [3, 5, 7]])

#### Broadcasting Rules

**1. Make the two arrays have the same number of dimensions.**<br>
- If the numbers of dimensions of the two arrays are different, add new dimensions with size 1 to the head of the array with the smaller dimension.<br>

**2. Make each dimension of the two arrays the same size.**<br>
- If the sizes of each dimension of the two arrays do not match, dimensions with size 1 are stretched to the size of the other array.
- If there is a dimension whose size is not 1 in either of the two arrays, it cannot be broadcasted, and an error is raised.

In [34]:
# More examples

a = np.arange(12).reshape(4,3)
b = np.arange(3)

print(a)
print(b)

print(a+b)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[0 1 2]
[[ 0  2  4]
 [ 3  5  7]
 [ 6  8 10]
 [ 9 11 13]]


In [35]:
a = np.arange(3).reshape(1,3)
b = np.arange(3).reshape(3,1)

print(a)
print(b)

print(a+b)

[[0 1 2]]
[[0]
 [1]
 [2]]
[[0 1 2]
 [1 2 3]
 [2 3 4]]


In [36]:
a = np.arange(3).reshape(1,3)
b = np.arange(4).reshape(4,1)

print(a)
print(b)

print(a + b)

[[0 1 2]]
[[0]
 [1]
 [2]
 [3]]
[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]]


### Working with mathematical formulas

In [37]:
a = np.arange(10)
np.sin(a)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])

In [38]:
# sigmoid
def sigmoid(array):
  return 1/(1 + np.exp(-(array)))


a = np.arange(100)

sigmoid(a)

array([0.5       , 0.73105858, 0.88079708, 0.95257413, 0.98201379,
       0.99330715, 0.99752738, 0.99908895, 0.99966465, 0.99987661,
       0.9999546 , 0.9999833 , 0.99999386, 0.99999774, 0.99999917,
       0.99999969, 0.99999989, 0.99999996, 0.99999998, 0.99999999,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.     

In [39]:
# mean squared error

actual = np.random.randint(1,50,25)
predicted = np.random.randint(1,50,25)

In [40]:
def mse(actual,predicted):
  return np.mean((actual - predicted)**2)

mse(actual,predicted)

np.float64(424.04)

In [41]:
# binary cross entropy
np.mean((actual - predicted)**2)

np.float64(424.04)

In [42]:
actual

array([19, 10, 36, 28, 49, 33, 25, 14,  9, 37,  4, 42, 26, 46, 26, 19, 21,
       18, 37, 12, 35, 21, 42,  4, 23], dtype=int32)

### Working with missing values

In [43]:
# Working with missing values -> np.nan
a = np.array([1,2,3,4,np.nan,6])
a

array([ 1.,  2.,  3.,  4., nan,  6.])

In [44]:
a[~np.isnan(a)]

array([1., 2., 3., 4., 6.])