In [1]:
import numpy as np
from numpy import array, mean, std, cov
from numpy.linalg import eig
from scipy import stats

In [2]:
# define a matrix
A = array([[1, 5, 3, 1], [4, 2, 6, 3], [1, 4, 3, 2], [4, 4, 1, 1], [5, 5, 2, 3]])
print(A)

[[1 5 3 1]
 [4 2 6 3]
 [1 4 3 2]
 [4 4 1 1]
 [5 5 2 3]]


# Step 1 - Standardize data

In [3]:
# calculate the mean of each column
M = mean(A)
print(M)

3.0


In [4]:
# calculate the standard deviation for each column
S = std(A)
print(S)

1.5491933384829668


In [5]:
# normalize the data
standardized_data = (A-M)/S
print(standardized_data)

[[-1.29099445  1.29099445  0.         -1.29099445]
 [ 0.64549722 -0.64549722  1.93649167  0.        ]
 [-1.29099445  0.64549722  0.         -0.64549722]
 [ 0.64549722  0.64549722 -1.29099445 -1.29099445]
 [ 1.29099445  1.29099445 -0.64549722  0.        ]]


In [None]:
# standardized = stats.zscore(A)
# print(standardized)

# Step 2 - Compute covariance

In [6]:
# calculate covariance matrix of centered matrix
V = cov(standardized_data.T)
print(V) # relation if any features to itself should be grater then any other relation

[[ 1.45833333 -0.20833333 -0.10416667  0.41666667]
 [-0.20833333  0.625      -0.72916667 -0.20833333]
 [-0.10416667 -0.72916667  1.45833333  0.41666667]
 [ 0.41666667 -0.20833333  0.41666667  0.41666667]]


In [8]:
V.shape

(4, 4)

# Step 3 - Calculate eigenvalues

In [11]:
# eigendecomposition of covariance matrix
values, vectors = eig(V)

In [12]:
values

array([2.05162406, 1.57796252, 0.07038105, 0.2583657 ])

In [None]:
print(vectors)

[[-0.26087912  0.91114239 -0.30581868 -0.09075555]
 [ 0.4801165   0.0378551  -0.5085857   0.71371964]
 [-0.77212094 -0.3639535  -0.48334366  0.19428332]
 [-0.32443718  0.18953731  0.64357939  0.66679959]]


In [None]:
print(values)

[2.05162406 1.57796252 0.07038105 0.2583657 ]


# Step 4 - Project PCA

In [None]:
# project data
PCA = vectors.T.dot(standardized_data.T)
print(PCA.T)

[[ 1.37546783 -1.37210066 -1.09262852  0.17773843]
 [-1.97351639 -0.1410884  -0.80510541 -0.14305846]
 [ 0.85613066 -1.27419021 -0.34890915  0.14745167]
 [ 1.55716757  0.83774557 -0.7325592  -0.70953168]
 [ 0.78143616  1.46008146 -0.73939455  0.67883383]]


In [None]:
np.sum(PCA.T, axis=0)

array([ 2.59668583, -0.48955224, -3.71859684,  0.15143379])

In [None]:
# delete irrelevent features if you want