#                           NUMPY
# Outline

1. numpy Introduction                          
2. Array basics                                
3. Array vs List                               
4. Dot Products                               
5. Speed Test array vs list                    
6. Multidimensional array(nd-array)            
7. Array indexing/slicing/boolean indexing     
8. Reshape                                         
9. Concatenation                                   
10. Broadcasting 
11. (Data Science) Function and Axis
12. Datatypes
13. Coping
14. Generating Array
15. Random Numbers
16. Linear Algebra(np.linalg)
 - Eigenvalues
 - Solving Linear System
17. Loading data from csv

# Numpy

- Core LIBRARY for scientific computing in python 
    - Data science, Machine Learning, Deep Learning 
    - scikit-learn, matplotlib, pandas.....
- High performance multidimensional array-> FAST!
- Mathematical operations with arrays
- A lot of code written in C 

# NumPy Usecase

- Array/Matrix operations - Linear algebra
- Dot product
- Matrix Multiplications
- Linear systems 
- Inverse, determinant
- Eigenvectors
- Random numbers
- Working with images represented as array

In [2]:
import numpy as np

In [5]:
np.__version__

'1.19.5'

In [10]:
a = np.array([1,2,3,4,5])
print(a)
print(a.shape)
print(a.dtype)
print(a.ndim)
print(a.size)
print(a.itemsize)
a.itemsize

[1 2 3 4 5]
(5,)
int32
1
5
4


In [13]:
a[0]
a[0] = 11
print(a)

[11  2  3  4  5]


In [17]:
b= a* np.array([2,0,2,3,4])
# c= a* np.array([2,0,2]) # both array should be same
print(b)

[22  0  6 12 20]


In [32]:
l=[1,2,3,4,5]
a= np.array([1,2,3,4,5])

In [33]:
l.append(4)
#a.append(4) #'numpy.ndarray' object has no attribute 'append'
print(l)
#print(a)

[1, 2, 3, 4, 5, 4]


In [35]:
# 
m = l + [7] # concatenate
print(m)
n= l * 2
print(n)
o= a + np.array([7]) # addition
print(o)
p = a + 2
print(p)

[1, 2, 3, 4, 5, 4, 7]
[1, 2, 3, 4, 5, 4, 1, 2, 3, 4, 5, 4]
[ 8  9 10 11 12]
[3 4 5 6 7]


In [43]:
np.sort(a)
sq=np.square(a)
print(sq)
print(np.sqrt(sq))
np.sqrt(a)

[ 1  4  9 16 25]
[1. 2. 3. 4. 5.]


array([1.        , 1.41421356, 1.73205081, 2.        , 2.23606798])

# dot product

Dot product is sum of the product to the crrospanding entries

In [45]:
l1 = [1,2,3]
l2 = [4,5,6]
a1 = np.array(l1)
a2 = np.array(l2)

dot=0
for i in range(len(l1)):
    dot+= l1[i]*l2[i]
print(dot)


32


In [51]:
dot=np.dot(a1,a2)
print(dot)

sum1= a1* a2
sum1
dot= np.sum(sum1)
print(dot)

dot= (a1*a2).sum()
print(dot)

dot=a1@a2
print(dot)

32
32
32
32


# speed test of numpy array and list

In [52]:
from timeit import default_timer as timer
a = np.random.randn(1000)
b = np.random.randn(1000)

A=list(a)
B=list(b)

T=1000

def dot1():
    dot= 0
    for i in range(len(A)):
        dot+=A[i]*B[i]
    return dot
def dot2():
    return np.dot(a,b)

start=timer()
for t in range(T):
    dot1()
end=timer()
t1= end-start

start= timer()
for t in range(T):
    dot2()
end=timer()
t2= end-start

print('list calculation',t1)
print('np.dot',t2)
print('ratio',t1/t2)

list calculation 0.4016129000001456
np.dot 0.013237400000434718
ratio 30.339258463667832


# multidimensional array

In [4]:
a= np.array([[1,2],[3,4]])
print(a)
print('shape:',a.shape)

b= np.array([[1,2,3],[3,4,5]])
print(b)
print('shape:',b.shape)

print(a[0][1],b[1][1])
print(a[0,1],b[1,1])

[[1 2]
 [3 4]]
shape: (2, 2)
[[1 2 3]
 [3 4 5]]
shape: (2, 3)
2 4
2 4


slicing

In [9]:
print(b[:,0])
print(b[0,:])

array([1, 2, 3])

In [23]:
print(b)
print("Transpose:\n",b.T)
print("inverse",)
print(np.linalg.inv(a))
print("Determinant:",np.linalg.det(a))
print("Diagonal Matrix:",np.diagonal(a))
print("Diagonal Matrix:",np.diag(a))
d=np.diag(a)
print("Diagonal Matrix:\n",np.diag(d))

[[1 2 3]
 [3 4 5]]
Transpose:
 [[1 3]
 [2 4]
 [3 5]]
inverse
[[-2.   1. ]
 [ 1.5 -0.5]]
Determinant: -2.0000000000000004
Diagonal Matrix: [1 4]
Diagonal Matrix: [1 4]
Diagonal Matrix:
 [[1 0]
 [0 4]]


In [29]:
c= np.array([[1,2,3,4],[5,6,7,8],[9,6,3,8],[7,5,3,8]])
print(c)
b=c[0,:]
print(c[-1,-3])

[[1 2 3 4]
 [5 6 7 8]
 [9 6 3 8]
 [7 5 3 8]]
5


In [35]:
bool_ind=c>2
print(bool_ind)
print(c[bool_ind])
print(c[c>5])
e=np.where(c>3,c,-1)
print(e)

[[False False  True  True]
 [ True  True  True  True]
 [ True  True  True  True]
 [ True  True  True  True]]
[3 4 5 6 7 8 9 6 3 8 7 5 3 8]
[6 7 8 9 6 8 7 8]
[[-1 -1 -1  4]
 [ 5  6  7  8]
 [ 9  6 -1  8]
 [ 7  5 -1  8]]


In [37]:
a=np.array([42,25,78,15,13,98,45,25])
print(a)
b=[1,3,5]
print(a[b])

[42 25 78 15 13 98 45 25]
[25 15 98]


In [43]:
#find even number
even=np.argwhere(a%2==0)
print(a[even])

even=np.argwhere(a%2==0).flatten()
print(a[even])

[[42]
 [78]
 [98]]
[42 78 98]



# Reshape

In [45]:
a= np.arange(1,10, dtype=int)
print(a)
print(a.shape)

[1 2 3 4 5 6 7 8 9]
(9,)


In [46]:
b=a.reshape(3,3)
b

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [50]:
b=a[:,np.newaxis]
print(b)
print(b.shape)
b=a[np.newaxis,:]
print(b)
print(b.shape)

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]
(9, 1)
[[1 2 3 4 5 6 7 8 9]]
(1, 9)


In [67]:
a= np.array([[1,2],[3,4]])
b= np.array([[5,6]])
c=np.concatenate((a,b), axis=None)
print(c)
c=np.concatenate((a,b), axis=0)
print(c)
c=np.concatenate((a,b.T), axis=1)
print(c)

[1 2 3 4 5 6]
[[1 2]
 [3 4]
 [5 6]]
[[1 2 5]
 [3 4 6]]


In [71]:
a=np.array([1,2,3,4])
b=np.array([5,6,7,8])
#hstack , vstack
c=np.hstack((a,b))
print(c)
c=np.vstack((a,b))
print(c)

[1 2 3 4 5 6 7 8]
[[1 2 3 4]
 [5 6 7 8]]


In [74]:
x= np.array([[1,2,3],[3,4,5],[7,5,3]])
b=np.array([1,0,2])
print(x)
y= x+b
print(y)

[[1 2 3]
 [3 4 5]
 [7 5 3]]
[[2 2 5]
 [4 4 7]
 [8 5 5]]


In [82]:
y= np.array([[8,7,9,10],[7,12,11,13]])
print(y)
print("sum all values:",y.sum(axis=None))
print("vertically sum:",y.sum(axis=0))
print("horizontally sum:",y.sum(axis=1))

[[ 8  7  9 10]
 [ 7 12 11 13]]
sum all values: 77
vertically sum: [15 19 20 23]
horizontally sum: [34 43]


In [86]:
print(x)
print("sum all values:",x.sum(axis=None))
print("sum all values:",np.sum(x,axis=None)) # we can use respectively mean,median,average,std, etd,.
print("vertically sum:",x.sum(axis=0))
print("horizontally sum:",x.sum(axis=1))

[[1 2 3]
 [3 4 5]
 [7 5 3]]
sum all values: 33
sum all values: 33
vertically sum: [11 11 11]
horizontally sum: [ 6 12 15]


# copy

In [87]:
a = np.array([4,5,6,9])
b=a
print(b)
print(a)
b[0]=42
print(b)
print(a)

[4 5 6 9]
[4 5 6 9]
[42  5  6  9]
[42  5  6  9]


In [88]:
a = np.array([4,5,6,9])
b=a.copy()
print(b)
print(a)
b[0]=42
print(b)
print(a)

[4 5 6 9]
[4 5 6 9]
[42  5  6  9]
[4 5 6 9]


# Generating Data, Arrays

In [98]:
z1= np.zeros((2,3))
print(z1)
print(z1.dtype)
z2= np.zeros(10)
print(z2)
print(z2.dtype)

o1= np.ones((2,3),dtype=int)
print(o1)
print(o1.dtype)
o2= np.ones(10)
print(o2)
print(o2.dtype)

[[0. 0. 0.]
 [0. 0. 0.]]
float64
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
float64
[[1 1 1]
 [1 1 1]]
int32
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
float64


In [105]:
# array with specific values
sp=np.full((2,3),5.0,dtype=int)
print(sp)
print(sp.dtype)

[[5 5 5]
 [5 5 5]]
int32


# idnetity Matrix

In [118]:
i=np.eye(4)
print(i)
i=np.eye(4,M=5)
print("\n",i)
i=np.eye(4,k=2,dtype=int)
print("\n",i)
i=np.eye(4,M=5,k=1,dtype=int)
print("\n",i)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]

 [[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

 [[0 0 1 0]
 [0 0 0 1]
 [0 0 0 0]
 [0 0 0 0]]

 [[0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]]


In [126]:
a=np.linspace(1,13,4)
print(a)
a=np.linspace(0,(3,4),4)
print(a)

[ 1.  5.  9. 13.]
[[0.         0.        ]
 [1.         1.33333333]
 [2.         2.66666667]
 [3.         4.        ]]


# Random number

In [141]:
a = np.random.random((2,3)) #0-1
print(a)
print(a.mean(), a.var())

rn = np.random.randn(3,4) #Normal/ Gaussion Distribution
print("\n",rn)

rin = np.random.randint(11,size=(3,3)) #Normal
print("\n",rin)

cho = np.random.choice(5,size=10) #Normal
print("\n",cho)
cho = np.random.choice([-1,-4,-3],size=10) #Normal
print("\n",cho)

[[0.45341369 0.61155535 0.15835023]
 [0.19495165 0.23042858 0.92873799]]
0.4295729163411855 0.07518651863869359

 [[ 0.88933378  0.19102618  0.68599647 -0.79892228]
 [ 1.04910157 -0.91993924 -0.94873901  0.18853671]
 [ 0.66634195 -1.77024592  1.01162087  1.13768857]]

 [[ 7  5  2]
 [10  3  5]
 [ 2 10  5]]

 [2 2 1 4 3 1 2 0 0 1]

 [-1 -1 -3 -4 -4 -3 -3 -1 -1 -1]


In [163]:
 b = np.random.randint(1,65,(8,8))
 b = np.asarray(b,dtype=float)
 b

array([[12., 63., 48., 42., 61., 45., 20., 10.],
       [24., 17.,  2., 57., 22., 61.,  2., 44.],
       [16., 11., 29.,  7., 42.,  2.,  7., 38.],
       [29., 42., 51., 39., 10., 64., 57., 25.],
       [ 4.,  8., 57.,  3.,  5., 14., 44., 46.],
       [ 1., 11., 41.,  2., 46., 36., 64., 44.],
       [43., 29., 18., 37., 12., 38.,  1., 49.],
       [47., 19., 40., 40., 57., 48., 15., 51.]])

In [173]:
b = np.ones((8,8))
print("\n",b)
b[0,:]=2
print("\n",b)
b[[0,1,2,3,4],[0,1,2,3,4]]=[[9,8,7,6,5]]
print("\n",b)


 [[1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]]

 [[2. 2. 2. 2. 2. 2. 2. 2.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]]

 [[9. 2. 2. 2. 2. 2. 2. 2.]
 [1. 8. 1. 1. 1. 1. 1. 1.]
 [1. 1. 7. 1. 1. 1. 1. 1.]
 [1. 1. 1. 6. 1. 1. 1. 1.]
 [1. 1. 1. 1. 5. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]]


# EigenValue, it apply for PCA 

In [144]:
a=np.array([[1,2],[3,4]])
eigenvalues, eigenvectors =np.linalg.eig(a)
print("eigenvalues:\n",eigenvalues)
print("eigenvectors\n",eigenvectors) #column vectors!

eigenvalues:
 [-0.37228132  5.37228132]
eigenvectors
 [[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]


In [148]:
# justify
# e_vec*e_val=A* e_vec
b= eigenvectors[:,0]*eigenvalues[0]
print("\n",b)
c=a @ eigenvectors[:,0]
print("\n",c)

#print("\n",b==c)
print("\n",np.allclose(b,c))


 [ 0.30697009 -0.21062466]

 [ 0.30697009 -0.21062466]

 True


# solving linear systems

Q: the addmission fee at a small fair is tk 1.50  for 
children and tk 4.00 for adults.on a certain day, 2200 people enter the fair and tk 5050 is collected. How many children and how many adults attended? 

sol:-> 2 Unknowns/ 2 Equations
    - x1 + x2 = 2200
    - 1.5*x1 + 4.0*x2 = 5050

here, x=([x1,x2]), A=([[1,1],[1.5,4]]),b=([2200,5050])

Ax = b
=> x = A^-1 * b

In [152]:
A=np.array([[1,1],[1.5,4.0]])
b=np.array([2200,5050])
x=np.linalg.inv(A).dot(b) 
print("\n",x)

y=np.linalg.solve(A,b)
print("\n",y)


 [1500.  700.]

 [1500.  700.]


# Load Data

two ways to load data
- np.loadtxt
- np.genfromtxt

In [157]:
data= np.loadtxt(r"F:\\thesis\\diabetes\kaggle_diabetes.csv", delimiter=",", dtype=np.float)

ValueError: could not convert string to float: 'Pregnancies'