<a href="https://colab.research.google.com/github/tofighi/MachineLearning/blob/master/PCA_Numerical_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PCA

In [1]:
from io import StringIO
import csv
import pandas as pd
import numpy as np
from numpy import array
from numpy import mean
from numpy import cov
from numpy.linalg import eig

#
data ="""
x,y
2.5,2.4
0.5,0.7
2.2,2.9
1.9,2.2
3.1,3
2.3,2.7
2,1.6
1,1.1
1.5,1.6
1.1,0.9
"""

df = pd.read_csv(StringIO(data))
df

Unnamed: 0,x,y
0,2.5,2.4
1,0.5,0.7
2,2.2,2.9
3,1.9,2.2
4,3.1,3.0
5,2.3,2.7
6,2.0,1.6
7,1.0,1.1
8,1.5,1.6
9,1.1,0.9


# Data Matrix (D)

In [2]:
# Define Data numeric values: D matrix)
D = df.values
print(D)

[[2.5 2.4]
 [0.5 0.7]
 [2.2 2.9]
 [1.9 2.2]
 [3.1 3. ]
 [2.3 2.7]
 [2.  1.6]
 [1.  1.1]
 [1.5 1.6]
 [1.1 0.9]]


# Zero centered features (Z matrix)

In [3]:
# calculate the mean of each column
M = mean(D.T, axis=1)
print('Mean of each column:')
print(M)

# center columns by subtracting column means
print('\nZero centered features: Z Matrix:')
Z = D - M
print(Z)

Mean of each column:
[1.81 1.91]

Zero centered features: Z Matrix:
[[ 0.69  0.49]
 [-1.31 -1.21]
 [ 0.39  0.99]
 [ 0.09  0.29]
 [ 1.29  1.09]
 [ 0.49  0.79]
 [ 0.19 -0.31]
 [-0.81 -0.81]
 [-0.31 -0.31]
 [-0.71 -1.01]]


# Covariance Matrix (COV)

In [4]:
# calculate covariance matrix of centered matrix
print('\nZero centered matrix Z:')
print(Z)

print('\nTranspose of Zero centered matrix Z.T:')
print(Z.T)

print('\nCovarince Matrix: COV(from library):')
COV = cov(Z.T)
print(COV)

print('\nCovarince Matrix: COV(from formula):')

# Dividing by number of records (len(C)) - 1
COV= (1/(len(Z)-1)) * np.matmul(Z.T,Z)
print(COV)


Zero centered matrix Z:
[[ 0.69  0.49]
 [-1.31 -1.21]
 [ 0.39  0.99]
 [ 0.09  0.29]
 [ 1.29  1.09]
 [ 0.49  0.79]
 [ 0.19 -0.31]
 [-0.81 -0.81]
 [-0.31 -0.31]
 [-0.71 -1.01]]

Transpose of Zero centered matrix Z.T:
[[ 0.69 -1.31  0.39  0.09  1.29  0.49  0.19 -0.81 -0.31 -0.71]
 [ 0.49 -1.21  0.99  0.29  1.09  0.79 -0.31 -0.81 -0.31 -1.01]]

Covarince Matrix: COV(from library):
[[0.61655556 0.61544444]
 [0.61544444 0.71655556]]

Covarince Matrix: COV(from formula):
[[0.61655556 0.61544444]
 [0.61544444 0.71655556]]


In [5]:
# eigendecomposition of covariance matrix
values, vectors = eig(COV)
print(f'Eigen values:\n {values}')
print(f'\nEigen vectors:\n {vectors}')

Eigen values:
 [0.0490834  1.28402771]

Eigen vectors:
 [[-0.73517866 -0.6778734 ]
 [ 0.6778734  -0.73517866]]


# Sorting Eigen vectors based on |Eigen values| (absolute values)

In [6]:
sort_index = np.argsort(np.abs(values))

print(f'Sorting the eigen vectors: {sort_index}')

Sorting the eigen vectors: [0 1]


# Calculating Transform Matrix

In [7]:
transform_mat = []
for i in sort_index:
  transform_mat.append(vectors[i])

transform_mat= np.column_stack(transform_mat)  
print(f'Transform matrix:\n {transform_mat}')


Transform matrix:
 [[-0.73517866  0.6778734 ]
 [-0.6778734  -0.73517866]]


# Calculating Principal components

In [8]:
PC = np.matmul(Z,transform_mat)
print('\nZero centered matrix Z:')
print(Z)
print(f'\nPrincipal Components:\n {PC}')


Zero centered matrix Z:
[[ 0.69  0.49]
 [-1.31 -1.21]
 [ 0.39  0.99]
 [ 0.09  0.29]
 [ 1.29  1.09]
 [ 0.49  0.79]
 [ 0.19 -0.31]
 [-0.81 -0.81]
 [-0.31 -0.31]
 [-0.71 -1.01]]

Principal Components:
 [[-8.39431238e-01  1.07495104e-01]
 [ 1.78331085e+00  1.55202114e-03]
 [-9.57814340e-01 -4.63456244e-01]
 [-2.62749365e-01 -1.52193204e-01]
 [-1.68726247e+00  7.31119496e-02]
 [-8.95757526e-01 -2.48633173e-01]
 [ 7.04568090e-02  3.56701329e-01]
 [ 1.14457216e+00  4.64172582e-02]
 [ 4.38046137e-01  1.77646297e-02]
 [ 1.20662898e+00  2.61240329e-01]]
