In [1]:
import numpy as np

a = np.array([1,2,3,4])

print(a)

[1 2 3 4]


In [16]:
# import time library to find out how long the operations take
import time

a = np.random.rand(1000000)
b = np.random.rand(1000000)

# to measure the current time 
tic = time.time()

c = np.dot(a,b)

toc = time.time()

print(c)
print("Vectorized Version : " + str(1000*(toc - tic)) + "ms")

c = 0
for i in range(1000000):
    c+=a[i]*b[i]

toca=time.time()

print(c)
print("For Loop :" + str(1000*(toca-tic)) + "ms")

250365.57298343006
Vectorized Version : 4.002094268798828ms
250365.57298342072
For Loop :611.0646724700928ms


In [17]:
# both vectorized and non vectorized versions computed same values 
# but look at the computation difference, hence vectorized version
# is better than nonvectorized/for loop one 

**Broadcasting in python**

In [18]:
import numpy as np

A = np.array([[56.0, 0.0, 4.4, 68.0],
            [1.2, 104.0, 52.0, 8.0],
            [1.8, 13.5, 99.0, 0.9]])
print(A)

[[ 56.    0.    4.4  68. ]
 [  1.2 104.   52.    8. ]
 [  1.8  13.5  99.    0.9]]


In [23]:
# axis = 0 is for column while axis = 1 for row
cal = np.sum(A,axis=0)
cal

array([ 59. , 117.5, 155.4,  76.9])

In [27]:
# A is 3X4 matrix and divided by a 1X4 matrix
percentage = 100*A/cal
percentage

array([[94.91525424,  0.        ,  2.83140283, 88.42652796],
       [ 2.03389831, 88.5106383 , 33.46203346, 10.40312094],
       [ 3.05084746, 11.4893617 , 63.70656371,  1.17035111]])

**Tips and tricks useful to eliminate Bugs**

In [28]:
# when you create a vector like
import numpy as np
a = np.random.randn(5)

In [29]:
print(a)

[-0.5149842   0.69388674 -0.0450531  -0.85079102 -1.27163676]


In [30]:
print(a.shape)

(5,)


In [31]:
# we see (5,) vector which is neither row vector nor a column vector
# but rather called a rank 1 array 
# this has some confusing effects
# if we print a transpose, it ends up looking the same as a

print(a.T)

[-0.5149842   0.69388674 -0.0450531  -0.85079102 -1.27163676]


In [32]:
print(np.dot(a,a.T))

3.0896227207388414


In [33]:
# so do not use the data structures where the shape is in form (5,)
# tht is do not use rank 1 array
# so always provide a complete shape as a matrix, even if your array
# is 1D

In [37]:
a = np.random.randn(5,1)
print(a)

[[ 0.49525802]
 [-1.14410362]
 [ 0.11587003]
 [ 0.26800083]
 [-0.67840117]]


In [38]:
print(a.T)

[[ 0.49525802 -1.14410362  0.11587003  0.26800083 -0.67840117]]


In [39]:
print(np.dot(a,a.T))

[[ 0.24528051 -0.5666265   0.05738556  0.13272956 -0.33598362]
 [-0.5666265   1.30897309 -0.13256733 -0.30662072  0.77616123]
 [ 0.05738556 -0.13256733  0.01342586  0.03105327 -0.07860637]
 [ 0.13272956 -0.30662072  0.03105327  0.07182445 -0.18181208]
 [-0.33598362  0.77616123 -0.07860637 -0.18181208  0.46022815]]


In [40]:
a = np.random.randn(3, 3)
b = np.random.randn(3, 1)
c = a*b

In [41]:
c

array([[ 1.88879249, -1.39906118,  0.04796448],
       [-0.95753697, -0.19120929, -1.01169064],
       [-0.62926602, -0.43548537,  0.78360387]])

In [44]:
a = np.random.randn(2, 3) # a.shape = (2, 3)
b = np.random.randn(2, 1) # b.shape = (2, 1)
c = a + b
c.shape

(2, 3)