In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = np.array([[3, 7],
                [-4, -6],
                [1, -1],
                [7,8],
                [-4, -1],
                [-3, -7]])


dataframe = pd.DataFrame(data, columns = ['feature1', 'feature2'])
dataframe

Unnamed: 0,feature1,feature2
0,3,7
1,-4,-6
2,1,-1
3,7,8
4,-4,-1
5,-3,-7


1. To make sure that our original data is in a standard normal form.

This is the below constraint of SNF
```python
mean = 0
standard deviation = 1
```

In [None]:
dataframe.describe()

Unnamed: 0,feature1,feature2
count,6.0,6.0
mean,0.0,0.0
std,4.472136,6.324555
min,-4.0,-7.0
25%,-3.75,-4.75
50%,-1.0,-1.0
75%,2.5,5.0
max,7.0,8.0


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
dataframe = scaler.fit_transform(dataframe)

In [None]:
type(dataframe)

numpy.ndarray

In [None]:
dataframe = pd.DataFrame(data=dataframe, columns=  ['feature1', 'feature2'])
dataframe

Unnamed: 0,feature1,feature2
0,0.734847,1.212436
1,-0.979796,-1.03923
2,0.244949,-0.173205
3,1.714643,1.385641
4,-0.979796,-0.173205
5,-0.734847,-1.212436


In [None]:
dataframe.describe()

Unnamed: 0,feature1,feature2
count,6.0,6.0
mean,1.850372e-17,0.0
std,1.095445,1.095445
min,-0.9797959,-1.212436
25%,-0.9185587,-0.822724
50%,-0.244949,-0.173205
75%,0.6123724,0.866025
max,1.714643,1.385641


2. Covariance matrix between the above two features (feature1 and feature2)

In [None]:
covariance_matrix = dataframe.T @ dataframe / 5
covariance_matrix

Unnamed: 0,feature1,feature2
feature1,1.2,1.06066
feature2,1.06066,1.2


In [None]:
covariance_matrix = np.cov(dataframe.feature1, dataframe.feature2)
covariance_matrix

array([[1.2       , 1.06066017],
       [1.06066017, 1.2       ]])

3. Try to evaluate the eigen values and eigen vectors using covariance matrix

In [None]:
eigenValues, eigenVectors = np.linalg.eig(covariance_matrix)

print(eigenValues)
print(eigenVectors)

[2.26066017 0.13933983]
[[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]


4. To evaluate the new features extracted (Principal Components)

In [None]:
PC1 = dataframe @ eigenVectors[:, 0]
PC1

Unnamed: 0,0
0,1.376937
1,-1.427667
2,0.050731
3,2.192231
4,-0.815295
5,-1.376937


In [None]:
PC2 = dataframe @ eigenVectors[:, 1]
PC2

Unnamed: 0,0
0,0.337706
1,-0.042027
2,-0.29568
3,-0.23264
4,0.570346
5,-0.337706


How can we write the code in the industries to implement PCA

In [None]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)


X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [None]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

Standard Normal Form

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
from sklearn.decomposition import PCA

pca = PCA()
X_pca = pca.fit_transform(X_scaled)

In [None]:
X_pca

array([[-2.26470281e+00,  4.80026597e-01,  1.27706022e-01,
        -2.41682039e-02],
       [-2.08096115e+00, -6.74133557e-01,  2.34608854e-01,
        -1.03006775e-01],
       [-2.36422905e+00, -3.41908024e-01, -4.42014848e-02,
        -2.83770534e-02],
       [-2.29938422e+00, -5.97394508e-01, -9.12901063e-02,
         6.59555596e-02],
       [-2.38984217e+00,  6.46835383e-01, -1.57381957e-02,
         3.59228133e-02],
       [-2.07563095e+00,  1.48917752e+00, -2.69682944e-02,
        -6.60818022e-03],
       [-2.44402884e+00,  4.76441976e-02, -3.35470401e-01,
         3.67755572e-02],
       [-2.23284716e+00,  2.23148073e-01,  8.86954979e-02,
         2.46120962e-02],
       [-2.33464048e+00, -1.11532768e+00, -1.45076864e-01,
         2.68592208e-02],
       [-2.18432817e+00, -4.69013561e-01,  2.53765567e-01,
         3.98992877e-02],
       [-2.16631010e+00,  1.04369065e+00,  2.68681102e-01,
        -1.67313672e-02],
       [-2.32613087e+00,  1.33078335e-01, -9.37592444e-02,
      

In [None]:
evr = pca.explained_variance_ratio_

In [None]:
evr

array([0.72962445, 0.22850762, 0.03668922, 0.00517871])