# Vectorization :

in NumPy refers to performing operations on entire arrays or array elements simultaneously, which is significantly faster and
more efficient than using explicit loops.

In [2]:
import numpy as np

In [None]:
a = np.arange(10,20)
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [None]:
#square of each elemnt  #element wise operation
a**2

array([100, 121, 144, 169, 196, 225, 256, 289, 324, 361])

In [None]:
np.sin?

In [None]:
np.log?

In [None]:
np.sin(a)

array([-0.54402111, -0.99999021, -0.53657292,  0.42016704,  0.99060736,
        0.65028784, -0.28790332, -0.96139749, -0.75098725,  0.14987721])

In [None]:
np.log(a)

array([2.30258509, 2.39789527, 2.48490665, 2.56494936, 2.63905733,
       2.7080502 , 2.77258872, 2.83321334, 2.89037176, 2.94443898])

In [None]:
#all numpy builtin functions are vectorized by nature => dont need for loops => faster

In [None]:
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [None]:
#factorial

In [None]:
import math

In [None]:
math.factorial(3)

6

In [None]:
math.factorial?

In [None]:
#fact of all the elements of my array
math.factorial(a)

TypeError: only integer scalar arrays can be converted to a scalar index

In [None]:
def new_ops(x):
  if x%2 ==0 :
    return x**2
  else:
    return x**3

In [None]:
new_ops(2)

4

In [None]:
new_ops(a)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
for i in a:
  print(math.factorial(i))

3628800
39916800
479001600
6227020800
87178291200
1307674368000
20922789888000
355687428096000
6402373705728000
121645100408832000


In [None]:
for i in a:
  print(new_ops(i))

100
1331
144
2197
196
3375
256
4913
324
6859


In [None]:
#vectorize these functions which are not numpy inbuild
vec_factorial = np.vectorize(math.factorial)

In [None]:
vec_factorial(a)

array([           3628800,           39916800,          479001600,
               6227020800,        87178291200,      1307674368000,
           20922789888000,    355687428096000,   6402373705728000,
       121645100408832000])

In [None]:
#new_ops
vect_ops = np.vectorize(new_ops) #convenient wrapper that allows you to apply regular python function
vect_ops(a)

array([ 100, 1331,  144, 2197,  196, 3375,  256, 4913,  324, 6859])

In [None]:
np.vectorize(new_ops)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
np.vectorize(new_ops)(a)

array([ 100, 1331,  144, 2197,  196, 3375,  256, 4913,  324, 6859])

In [None]:
#is it really faster ???

In [None]:
#homework
import numpy as np
import time

size = 10_000_000
a = np.arange(size)

# Python loop
start1 = time.time()
out1 = [x * 2 for x in a]
end1 = time.time()

# np.vectorize
def double(x): return x * 2
vec_func = np.vectorize(double)

start2 = time.time()
out2 = vec_func(a)
end2 = time.time()

# True NumPy vectorized
start3 = time.time()
out3 = a * 2
end3 = time.time()

print(f"Python loop:        {end1 - start1} seconds")
print(f"np.vectorize:       {end2 - start2} seconds")
print(f"True vectorized:    {end3 - start3} seconds")

In [None]:
np.vectorize?

In [None]:
#element wise operations happens in numpy array

In [None]:
#1d arrays
a = np.arange(1,6)
a

array([1, 2, 3, 4, 5])

In [None]:
a * 5 # 5 constant , scaler value

array([ 5, 10, 15, 20, 25])

In [None]:
b = np.arange(6,11)
b

array([ 6,  7,  8,  9, 10])

In [None]:
a * b #1d * 1d they should have same shape

array([ 6, 14, 24, 36, 50])

In [None]:
c = np.array([1,2,3])
a*c

ValueError: operands could not be broadcast together with shapes (5,) (3,) 

In [None]:
d = np.arange(12).reshape(3,4)
e = np.arange(13,25).reshape(3,4)
print(d) #2d
print(e) #2d

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[13 14 15 16]
 [17 18 19 20]
 [21 22 23 24]]


In [None]:
d * e

array([[  0,  14,  30,  48],
       [ 68,  90, 114, 140],
       [168, 198, 230, 264]])

In [None]:
f = np.arange(13,25).reshape(6,2)
f

array([[13, 14],
       [15, 16],
       [17, 18],
       [19, 20],
       [21, 22],
       [23, 24]])

In [None]:
d * f

ValueError: operands could not be broadcast together with shapes (3,4) (6,2) 

In [None]:
#array * number => works
# array * arry => same shape => works
#array * array => diff shape => doesnt work

In [None]:
#Matrix Multiplication

'''
np.matmul(a,b)
np.dot(a,b)
a @ b # python 3.5 version onwards
'''

In [None]:
a = np.arange(1,5).reshape(2,2)
a

array([[1, 2],
       [3, 4]])

In [None]:
b = np.arange(2,8).reshape(2,3)
b

array([[2, 3, 4],
       [5, 6, 7]])

In [None]:
a*b

ValueError: operands could not be broadcast together with shapes (2,2) (2,3) 

In [None]:
np.matmul(a,b) #large dimensions of data > 3d data

array([[12, 15, 18],
       [26, 33, 40]])

In [None]:
np.dot(a,b) #common way

array([[12, 15, 18],
       [26, 33, 40]])

In [None]:
a @ b

array([[12, 15, 18],
       [26, 33, 40]])

In [None]:
a

array([[1, 2],
       [3, 4]])

In [None]:
np.matmul(a,5)

ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

In [None]:
a @ 5

ValueError: matmul: Input operand 1 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

In [None]:
a

array([[1, 2],
       [3, 4]])

In [None]:
np.dot(a,5) #more generic => 1d & 1d , array * scaler, array * array

array([[ 5, 10],
       [15, 20]])

In [None]:
#shallow vs deep copy

In [None]:
l1 = [1,2,3,4]
l2 = l1

In [None]:
l1[0]=20
l1

[20, 2, 3, 4]

In [None]:
l2

[20, 2, 3, 4]

In [None]:
print(id(l1),id(l2)) #shallow copy

135068919704832 135068919704832


In [None]:
#arrays
a = np.array([1,2,3,4])
a

array([1, 2, 3, 4])

In [None]:
b = a+1 # element wise operation
b

array([2, 3, 4, 5])

In [None]:
a

array([1, 2, 3, 4])

In [None]:
c = a #shallow copy => creating the another view of the data
c[0] = 100
c

array([100,   2,   3,   4])

In [None]:
a

array([100,   2,   3,   4])

In [None]:
print(id(a),id(b),id(c))

135068919995760 135068919998736 135068919995760


In [None]:
d = a+0
print(id(d))

135068918602448


In [None]:
a

array([100,   2,   3,   4])

In [None]:
a ** 2 #element wise operation => new copy => deep copy => new memory


array([10000,     4,     9,    16])

In [None]:
a

array([100,   2,   3,   4])

In [None]:
#reshape ??
a = np.array([1,2,3,4])
a

array([1, 2, 3, 4])

In [None]:
b = a.reshape(2,2) #shallow copy
b

array([[1, 2],
       [3, 4]])

In [None]:
a[0] =  1000
a

array([1000,    2,    3,    4])

In [None]:
b

array([[1000,    2],
       [   3,    4]])

In [None]:
np.shares_memory(a,b)

True

In [None]:
print(id(a),id(b))

135068919995376 135068919988944


In [None]:
print(id(a[0]),id(b[0][0]))

135068917040176 135068917040176


In [None]:
#inbuild functions of numpy
a = np.arange(9)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
b = a.copy()

In [None]:
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
np.shares_memory(a,b)

False

In [None]:
c = a.view()
c

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
np.shares_memory(a,c)

True

In [4]:
#homework
a = np.array([1,2,3,4,5,6])
b = a[a%2==0] #masking
np.shares_memory(a,b)
#deep copy / shallow copy


False

In [9]:
#slicing
a = np.array([1,2,3,4,5,6])
c = a[:2] # shallow copy
#np.shares_memory(a,c) output: True
c[0]=100
a,c

(array([100,   2,   3,   4,   5,   6]), array([100,   2]))

In [None]:
#array splitting

In [None]:
a = np.arange(10,19)
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18])

In [None]:
np.split(a,3)

[array([10, 11, 12]), array([13, 14, 15]), array([16, 17, 18])]

In [None]:
np.split(a,4)

ValueError: array split does not result in an equal division

In [None]:
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18])

In [None]:
#unequal size arrays
np.split(a,(3,5,6))

[array([10, 11, 12]), array([13, 14]), array([15]), array([16, 17, 18])]

In [None]:
#2d arrays
a = np.arange(16).reshape(4,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [None]:
np.split(a,2,axis = 0)

[array([[0, 1, 2, 3],
        [4, 5, 6, 7]]),
 array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]])]

In [None]:
np.split(a,2,axis = 1) #horizontal

[array([[ 0,  1],
        [ 4,  5],
        [ 8,  9],
        [12, 13]]),
 array([[ 2,  3],
        [ 6,  7],
        [10, 11],
        [14, 15]])]

In [None]:
np.split(a,2) #default axis = 0

[array([[0, 1, 2, 3],
        [4, 5, 6, 7]]),
 array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]])]

In [None]:
np.hsplit(a,2)

[array([[ 0,  1],
        [ 4,  5],
        [ 8,  9],
        [12, 13]]),
 array([[ 2,  3],
        [ 6,  7],
        [10, 11],
        [14, 15]])]

In [None]:
np.vsplit(a,2)

[array([[0, 1, 2, 3],
        [4, 5, 6, 7]]),
 array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]])]

In [None]:
#slicing => giving you 1 subpart of the array
#splitting => giving you multiple sub arrays at the same array

In [None]:
#stacking
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [None]:
b = np.arange(5,10)
b

array([5, 6, 7, 8, 9])

In [None]:
np.vstack((a,b))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [None]:
np.hstack((a,b))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
a

array([0, 1, 2, 3, 4])

In [None]:
np.vstack((a,a,a))

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [None]:
np.hstack((a,a,a))

array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4])

In [None]:
a = np.array([[1], [2], [3]])
b = np.array([[4], [5], [6]])
np.vstack((a, b))

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

In [None]:
a

array([[1],
       [2],
       [3]])

In [None]:
b

array([[4],
       [5],
       [6]])

In [None]:
#quiz
# What will be the output of following code?
a = np.array([[1], [2], [3]])
b = np.array([[4], [5], [6]])
print(a.shape,b.shape)
np.hstack((a, b))

(3, 1) (3, 1)


array([[1, 4],
       [2, 5],
       [3, 6]])

#doubts

In [None]:
def modify_list(lst): #view
  lst[0]=100

a = [1,2,3]
modify_list(a)
print(a)

[100, 2, 3]


In [None]:
def modify_list(a): #deep
  a=100
  return a

a = 5
b  = modify_list(a)
print(a,b)

5 100


In [None]:
np.sum(a) => functional style
a.sum() => method call #a => lst , pandas series