In [1]:
import numpy as np

In [2]:
## np array
a = np.array([1,2,3])
print(a)

[1 2 3]


In [6]:
print(a.shape) # shape
print(a.dtype) # data type
print(a.ndim) # n-dimension
print(a.size) # size of an array
print(a.itemsize) # item size (bytes)

(3,)
int64
1
3
8


In [7]:
print(a[0])

1


In [8]:
a[0] = 10
print(a)

[10  2  3]


In [9]:
# Elementwise mathematical

b = a*np.array([2,0,2])
print(b)

[20  0  6]


### List vs Array(np)

In [13]:
l = [1,2,3]
a = np.array([1,2,3])

l = l + [4]
print(l)
a = a + np.array([4]) #BROADCASTING
print(a)

[1, 2, 3, 4]
[5 6 7]


In [16]:
l = [1,2,3]
a = np.array([1,2,3])

l = l * 2
print(l)
a = a * 2
print(a)

[1, 2, 3, 1, 2, 3]
[2 4 6]


In [20]:
a = np.array([1,2,3])
a = np.sqrt(a)
a = np.log(a)
print(a)

[0.         0.34657359 0.54930614]


In [27]:
## dot product of vectors
# SUM OF PRODUCT OF CORRESPODING ENTRRIES
l1 = [1,2,3]
l2 = [4,5,6]
a1 = np.array(l1)
a2 = np.array(l2)


# dot product
dot = 0
for i in range(len(l1)):
    dot += l1[i] * l2[i]
print(dot)

dot = np.dot(a1,a2) ## faster
print(dot)

32
32


In [29]:
sum1 = a1 * a2 # element wise
dot = np.sum(sum1) # summing up
print(dot)

32


In [31]:
# newer version
dot = a1@a2
print(dot)

32


### Compare list and numpy arrays

In [36]:
from timeit import default_timer as timer

# some random numbers
a = np.random.randn(1000)
b = np.random.randn(1000)

# np array back to list
A = list(a)
B = list(b)

T = 1000

def dot1():
    dot = 0
    for i in range(len(A)):
        dot += A[i] * B[i]
    return dot

def dot2():
    return np.dot(a,b)

start = timer()
for t in range(T):
    dot1()
end = timer()
t1 = end - start

start = timer()
for t in range(T):
    dot2()
end = timer()
t2 = end - start

print('list calculation ', t1)
print('np.dot ',t2)
print('ratio ',t1/t2)


list calculation  0.21574653900006524
np.dot  0.0020971689998532383
ratio  102.87513262648999


## Multidimensional

In [41]:
a = np.array([[1,2,5],[3,4,6]])  # r,c
print(a)
a.shape

[[1 2 5]
 [3 4 6]]


(2, 3)

In [44]:
print(a[0]) # access first row
print(a[0][0]) # r,c

# shorter syntax
print(a[0,0])

[1 2 5]
1
1


In [47]:
# slicing
print(a[0,:])

[1 2 5]


In [48]:
## transpose
print(a.T)

[[1 3]
 [2 4]
 [5 6]]


In [50]:
## inverse
b = np.array([[1,2],[3,4]])
print(np.linalg.inv(b))

[[-2.   1. ]
 [ 1.5 -0.5]]


In [53]:
# determinant
print(np.linalg.det(b))

-2.0000000000000004


In [56]:
## Diagonal metrics
print(b)
print()
print(np.diag(b))

[[1 2]
 [3 4]]

[1 4]


In [58]:
# this method makes non-selected diagonal 
# values equals to 0
c = np.diag(b) 
print(np.diag(c))

[[1 0]
 [0 4]]


In [60]:
# indexing
b = a[0,1]
print(b)

2


In [68]:
a = np.array([[1,2,3,4],[5,6,7,8]])
print(a)
print()

b = a[0,1:3]
print(b)
print()

c = a[-1,-2]
print(c)

[[1 2 3 4]
 [5 6 7 8]]

[2 3]

7


In [71]:
## 2-d array boolean index
a = np.array([[1,2],[3,4],[5,6]])
print(a)

[[1 2]
 [3 4]
 [5 6]]


In [72]:
bool_idx = a > 2
print(bool_idx)

[[False False]
 [ True  True]
 [ True  True]]


In [73]:
# boolean masking
print(a[bool_idx])

[3 4 5 6]


In [75]:
# where method has (condition, if=true,else)

b = np.where(a>2,a,-1) 
print(b)

[[-1 -1]
 [ 3  4]
 [ 5  6]]


In [78]:
a = np.array([10,19,30,41,50,61])
print(a)
b  = [1,3,5]
print(a[b]) # it is actually indexing :o

[10 19 30 41 50 61]
[19 41 61]


In [80]:
# flatten() converts into 1D
even = np.argwhere(a%2==0).flatten()
print(a[even])

[10 30 50]


## Reshaping

In [81]:
a = np.arange(1,7)
print(a)
print(a.shape)

[1 2 3 4 5 6]
(6,)


In [84]:
b = a.reshape((3,2)) # shape could be equal to size
print(b)
print(b.shape)

[[1 2]
 [3 4]
 [5 6]]
(3, 2)


In [86]:
## add dimension
b = a[np.newaxis,:]
print(b)
print(b.shape)

[[1 2 3 4 5 6]]
(1, 6)


In [88]:
b = a[:,np.newaxis]
print(b)
print(b.shape)

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
(6, 1)


In [94]:
## concatenation
a = np.array([[1,2],[3,4]])
# print(a)
b = np.array([[5,6]])
# print(b)
c = np.concatenate((a,b),axis=None) # default 0 
print(c)

[1 2 3 4 5 6]


In [96]:
# hstack and vstack

a = np.array([1,2,3,4])
b = np.array([5,6,7,8])

c = np.hstack((a,b))
print(c)

d = np.vstack((a,b))
print(d)

[1 2 3 4 5 6 7 8]
[[1 2 3 4]
 [5 6 7 8]]


## Boradcasting

In [99]:
x = np.array([[1,2,3],[4,5,6],[1,2,3],[4,5,6]])
a = np.array([1,0,1]) # do not need more elements
y = x + a # added like zip() function
print(y)

[[2 2 4]
 [5 5 7]
 [2 2 4]
 [5 5 7]]


## Data Science functions

In [100]:
a = np.array([[7,8,9,10,11,12,13],[17,18,19,20,21,22,23]])
print(a)

[[ 7  8  9 10 11 12 13]
 [17 18 19 20 21 22 23]]


In [103]:
print(a.sum(axis=0)) # column wise
print(a.sum(axis=1)) # row wise
print(a.sum(axis=None)) # flattened

[24 26 28 30 32 34 36]
[ 70 140]
210


In [104]:
# use mean
print(a.mean(axis=0))

[12. 13. 14. 15. 16. 17. 18.]


## Data types

In [106]:
x = np.array([1,2],dtype=np.float32)
print(x)
print(x.dtype)

[1. 2.]
float32


## copying 

In [109]:
a = np.array([1,2,3])
# b = a # copying refernce 
c = a.copy()
c[0] = 42
print(b)
print(a)

[42  2  3]
[1 2 3]


## Generating arrays

In [119]:
a = np.zeros((2,3))
print(a)
print()
b = np.ones((2,3))
print(b)
print()
c = np.full((2,3),5.0)
print(c)
print()
d = np.eye(3)
print(d)
print()
e = np.arange(20)
print(e)
print()
f = np.linspace(0,10,5)
print(f)

[[0. 0. 0.]
 [0. 0. 0.]]

[[1. 1. 1.]
 [1. 1. 1.]]

[[5. 5. 5.]
 [5. 5. 5.]]

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]

[ 0.   2.5  5.   7.5 10. ]


## Random Numbers

In [121]:
a = np.random.random((3,2)) # 0-1
print(a)

[[0.23824483 0.82304422]
 [0.14803363 0.27460814]
 [0.76944598 0.15532815]]


In [125]:
## gaussian distrbution MEAN -1 STD - 0
a = np.random.randn(1000)
print(a.mean(),a.var())

-0.015880943275370308 1.010943629038107


In [127]:
# random integer
a = np.random.randint(3,10,size=(3,3))
print(a)

[[3 3 5]
 [6 5 6]
 [6 7 4]]


In [128]:
a = np.random.choice(5,size=10)
print(a)

[3 0 3 4 3 2 0 4 0 4]


In [130]:
a = np.random.choice([-8,-7,-6],size=10)
print(a)

[-8 -6 -6 -6 -6 -8 -6 -6 -6 -7]


## LinAlg module

- Useful for PCA algorithm

In [131]:
a = np.array([[1,2],[3,4]])
eigenvalues, eigenvectors = np.linalg.eig(a)

print(eigenvalues)
print(eigenvectors)

[-0.37228132  5.37228132]
[[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]


In [132]:
# e_vec * e_val = A * e_vec
b = eigenvectors[:,0] * eigenvalues[0]
print(b)

[ 0.30697009 -0.21062466]
