# Principle Component Analysis 
This is a very common method to reduce dimensinality of the data set given with the help of eigen vectors and there projection to the plane containing eigen vectors.

In [1]:
# importing some important libraries 
import numpy as np # for matrix maths 
from sklearn import datasets # datasets for implementing PCA
from sklearn.preprocessing import StandardScaler # method to make our dataset standard 

## Preparing our dataset 
This setup involves preparing our data set i.e. is loading and standarization of our data set 

In [2]:
# loading our data set 
data = datasets.load_iris()
data

 'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [3]:
# segegrating the data as per our need 

# features
X = data['data']

# output or target 
y = data['target']

In [5]:
# standarization of data 
X_std = StandardScaler().fit_transform(X)
X_std

array([[-9.00681170e-01,  1.03205722e+00, -1.34127240e+00,
        -1.31297673e+00],
       [-1.14301691e+00, -1.24957601e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-1.38535265e+00,  3.37848329e-01, -1.39813811e+00,
        -1.31297673e+00],
       [-1.50652052e+00,  1.06445364e-01, -1.28440670e+00,
        -1.31297673e+00],
       [-1.02184904e+00,  1.26346019e+00, -1.34127240e+00,
        -1.31297673e+00],
       [-5.37177559e-01,  1.95766909e+00, -1.17067529e+00,
        -1.05003079e+00],
       [-1.50652052e+00,  8.00654259e-01, -1.34127240e+00,
        -1.18150376e+00],
       [-1.02184904e+00,  8.00654259e-01, -1.28440670e+00,
        -1.31297673e+00],
       [-1.74885626e+00, -3.56360566e-01, -1.34127240e+00,
        -1.31297673e+00],
       [-1.14301691e+00,  1.06445364e-01, -1.28440670e+00,
        -1.44444970e+00],
       [-5.37177559e-01,  1.49486315e+00, -1.28440670e+00,
        -1.31297673e+00],
       [-1.26418478e+00,  8.00654259e-01, -1.22754100e+00,
      

## Covariance of Data Matrix
In this step we are going to find covariance of matrix which further would be used to find eigen vectors and eigen values 

In [6]:
# mean of data 
mean_vec = np.mean(X_std)

# finding covariance matrix 
cov_mat = (X_std - mean_vec).T.dot((X_std - mean_vec)) / (X_std.shape[0] - 1)

# print cov matrix 
print("Covariance matrix :\n{}".format(cov_mat))

Covariance matrix :
[[ 1.00671141 -0.11010327  0.87760486  0.82344326]
 [-0.11010327  1.00671141 -0.42333835 -0.358937  ]
 [ 0.87760486 -0.42333835  1.00671141  0.96921855]
 [ 0.82344326 -0.358937    0.96921855  1.00671141]]


In [7]:
# using np to find covariance matrix 
cov_mat = np.cov(X_std.T)
print("Cov Matirx :\n{}".format(cov_mat))

Cov Matirx :
[[ 1.00671141 -0.11010327  0.87760486  0.82344326]
 [-0.11010327  1.00671141 -0.42333835 -0.358937  ]
 [ 0.87760486 -0.42333835  1.00671141  0.96921855]
 [ 0.82344326 -0.358937    0.96921855  1.00671141]]


## Finding eigen values  
This helps in determing the values of matrixes and help us make a projection matrix

In [8]:
# finding eigen values and vectors 
eig_vals, eig_vecs = np.linalg.eig(cov_mat)

# printing the values of eigen vectors and eigen_values 
print("Eigen Values : \n{}".format(eig_vals))
print("Eigen Vectors : \n{}".format(eig_vecs))

Eigen Values : 
[2.93035378 0.92740362 0.14834223 0.02074601]
Eigen Vectors : 
[[ 0.52237162 -0.37231836 -0.72101681  0.26199559]
 [-0.26335492 -0.92555649  0.24203288 -0.12413481]
 [ 0.58125401 -0.02109478  0.14089226 -0.80115427]
 [ 0.56561105 -0.06541577  0.6338014   0.52354627]]


In [13]:
# creating a list of eigen values and eigen vectors 
eig_pair = [(np.abs(eig_vals[i]), eig_vecs[0:,i]) for i in range(len(eig_vals))]

# sorting the list based on reverse order of eigen values 
eig_pair.sort(key= lambda x : x[0], reverse=True)

# printing the values of eigen values in reverse order 
for i in eig_pair:
    print(i[0])

2.9303537755893183
0.9274036215173416
0.1483422264816397
0.02074601399559604


## Projection Matrix
This step involves creation of projection matrix in order to reduce dimensionality of our data matrix

In [27]:
# creating projection matrix 
matrix_w = np.hstack((eig_pair[0][1].reshape(X_std.shape[1],1),
                    eig_pair[1][1].reshape(X_std.shape[1],1)))
matrix_w

array([[ 0.52237162, -0.37231836],
       [-0.26335492, -0.92555649],
       [ 0.58125401, -0.02109478],
       [ 0.56561105, -0.06541577]])

In [28]:
# reducing dimension of matrix 
X_new = X.dot(matrix_w)
X_new

array([[ 2.66923088, -5.18088722],
       [ 2.69643401, -4.6436453 ],
       [ 2.4811633 , -4.75218345],
       [ 2.57151243, -4.62661492],
       [ 2.59065822, -5.23621104],
       [ 3.00809881, -5.68221692],
       [ 2.49094166, -4.90871397],
       [ 2.70145461, -5.05320922],
       [ 2.46158369, -4.36493047],
       [ 2.67166282, -4.73176885],
       [ 2.83139678, -5.47980351],
       [ 2.65510568, -4.98085502],
       [ 2.58763574, -4.59987189],
       [ 2.15207373, -4.40738428],
       [ 2.78696275, -5.90006937],
       [ 2.91688204, -6.25247172],
       [ 2.77559721, -5.67377901],
       [ 2.72579198, -5.1874288 ],
       [ 3.13458468, -5.6948152 ],
       [ 2.70491091, -5.46720523],
       [ 3.02665406, -5.20635552],
       [ 2.78780751, -5.38119115],
       [ 2.14920797, -5.07884578],
       [ 3.06596138, -5.02172909],
       [ 2.82948189, -4.98718345],
       [ 2.86492198, -4.6850961 ],
       [ 2.87270222, -5.06840185],
       [ 2.77959344, -5.22022854],
       [ 2.74780353,