<a href="https://colab.research.google.com/github/sajan-sarker/machine-learning/blob/main/numpy_vectorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

*Python, NumPy and Vectorization*

In [1]:
import numpy as np
import time

Vector Creation:

In [7]:
a = np.zeros(4)
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')
a = np.zeros((4,))
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')
a = np.random.random_sample(4)
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')

a = [0. 0. 0. 0.], shape: (4,), data type: float64
a = [0. 0. 0. 0.], shape: (4,), data type: float64
a = [0.32396797 0.16137418 0.5354722  0.5177139 ], shape: (4,), data type: float64


In [9]:
a = np.arange(4)
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')
a = np.random.rand(4)
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')

a = [0 1 2 3], shape: (4,), data type: int64
a = [0.31076711 0.55426719 0.99613908 0.48612948], shape: (4,), data type: float64


In [11]:
a = np.array([1, 2, 3, 4])
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')
a = np.array([1.2, 2.5, 3, 4])
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')

a = [1 2 3 4], shape: (4,), data type: int64
a = [1.2 2.5 3.  4. ], shape: (4,), data type: float64


Operations on vectorss
****

Indexing

In [17]:
a = np.arange(10)+1
print(a)
print(f'a = {a}, shape: {a.shape}, data type: {a.dtype}')
print(f'shape: {a[2].shape}, a[2]= {a[2]}')

try:
  c = a[10]
except Exception as e:
  print(e)

[ 1  2  3  4  5  6  7  8  9 10]
a = [ 1  2  3  4  5  6  7  8  9 10], shape: (10,), data type: int64
shape: (), a[2]= 3
index 10 is out of bounds for axis 0 with size 10


Slicing

In [23]:
print(a)
b = [1,2,3,4,5,6]
print(a[2:7])   # array slicing
print(b[2:7])   # list slicing
print(a[2:7:1])
print(a[2:7:2])
print(a[:3])
print(a[3:])
print(a[:])

[ 1  2  3  4  5  6  7  8  9 10]
[3 4 5 6 7]
[3, 4, 5, 6]
[3 4 5 6 7]
[3 5 7]
[1 2 3]
[ 4  5  6  7  8  9 10]
[ 1  2  3  4  5  6  7  8  9 10]


Single vector Operation

In [26]:
print(a)
print(-a)
print(np.sum(a))
print(np.mean(a))
print(a**2)

[ 1  2  3  4  5  6  7  8  9 10]
[ -1  -2  -3  -4  -5  -6  -7  -8  -9 -10]
55
5.5
[  1   4   9  16  25  36  49  64  81 100]


Vector addition, subtraction

In [28]:
a = np.arange(1, 11)
b = np.arange(11,21)
print(a)
print(b)
print(a + b)

[ 1  2  3  4  5  6  7  8  9 10]
[11 12 13 14 15 16 17 18 19 20]
[12 14 16 18 20 22 24 26 28 30]


In [29]:
c = np.arange(21, 26)
print(c)

[21 22 23 24 25]


In [30]:
try:
  d  = a + c
except Exception as e:
  print(e)

operands could not be broadcast together with shapes (10,) (5,) 


Scalar vector operations

In [31]:
print(a)
print(a * 5)

[ 1  2  3  4  5  6  7  8  9 10]
[ 5 10 15 20 25 30 35 40 45 50]


Dot product

In [37]:
a = np.array([1, 2, 3])
b = np.array([1, 2, 3])
print('a dot b using dot function:',a.dot(b))
print('a dot b using dot function:',np.dot(a,b))

x = 0
for i in range(a.shape[0]):
  x = x+ (a[i] * b[i])
print ('a dot b using for loop:',x)

a dot b using dot function: 14
a dot b using dot function: 14
a dot b using for loop: 14


Vector vs for loop calculation time difference

In [43]:
np.random.seed(1)
a = np.random.rand(10000000)
b = np.random.rand(10000000)

# dot product version
tic = time.time()   # start time
c = np.dot(a, b)
toc = time.time()   # end time
print(f"a dot b: {c:.4f}")
print(f"Vectorized version duration: {1000*(toc-tic):.4f} ms")

# loop version
x = 0
tic = time.time()
for i in range(a.shape[0]):
  x = x + (a[i] * b[i])
toc = time.time()
print(f"a dot b loop: {c:.4f}")
print(f"Loop version duration: {1000*(toc-tic):.4f} ms")

del(a)
del(b)

a dot b: 2501072.5817
Vectorized version duration: 12.3882 ms
a dot b loop: 2501072.5817
Loop version duration: 5422.0617 ms


In [44]:
x = np.array([[1], [2], [3],[4]])
y = np.array([2])
z = np.dot(x[1], y)
print(x)
print('x:', x.shape)
print(x[1])
print('x[1]:', x[1].shape)
print(y)
print('y:',y.shape)
print(z)
print('z',z.shape)

[[1]
 [2]
 [3]
 [4]]
x: (4, 1)
[2]
x[1]: (1,)
[2]
y: (1,)
4
z ()


Matrices
****

Matrix Creation

In [48]:
a = np.zeros(5)
print(f'a: {a}, Shape: {a.shape}')

b = np.zeros((1,5))
print(f"b: {b}, shape: {b.shape}")

f = np.array([[1], [2], [3], [4], [5]])
print(f"f: {f}, shape: {f.shape}")

e = np.zeros((2,1))
print(f"e: {e}, shape: {e.shape}")

c = np.random.random_sample(1)
print(f"c: {c}, shape: {c.shape}")

d = np.random.random_sample((1,1))
print(f"d: {d}, shape: {d.shape}")

a: [0. 0. 0. 0. 0.], Shape: (5,)
b: [[0. 0. 0. 0. 0.]], shape: (1, 5)
f: [[1]
 [2]
 [3]
 [4]
 [5]], shape: (5, 1)
e: [[0.]
 [0.]], shape: (2, 1)
c: [0.80204309], shape: (1,)
d: [[0.24814448]], shape: (1, 1)


Matrix Indexing

In [52]:
a = np.arange(6).reshape(3,2)
print(a)
print(f"a[2,0]: {a[2,0]}, shape: {a[2,0].shape}, type: {type(a[2,0])}")
print(f"a[2]: {a[2]}, shape: {a[2].shape}, type: {type(a[2])}")

[[0 1]
 [2 3]
 [4 5]]
a[2,0]: 4, shape: (), type: <class 'numpy.int64'>
a[2]: [4 5], shape: (2,), type: <class 'numpy.ndarray'>


matrix slicing

In [60]:
a = np.arange(20).reshape(2, 10)
print(a)
print()
print(f"a[0, 2:7:1]: {a[0, 2:7:1]}, shape: {a[0, 2:7:1].shape}, type: {type(a[0, 2:7:1])}")

print(f"a[:, 2:7:1]: \n {a[:, 2:7:1]}, shape: {a[:, 2:7:1].shape}, type: {type(a[0, 2:7:1])}")

print(f"a[1, :]: \n {a[1, :]}, shape: {a[1, :].shape}, type: {type(a[1,:])}")

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]

a[0, 2:7:1]: [2 3 4 5 6], shape: (5,), type: <class 'numpy.ndarray'>
a[:, 2:7:1]: 
 [[ 2  3  4  5  6]
 [12 13 14 15 16]], shape: (2, 5), type: <class 'numpy.ndarray'>
a[1, :]: 
 [10 11 12 13 14 15 16 17 18 19], shape: (10,), type: <class 'numpy.ndarray'>
