##Manually Calculate Priciple Component Analysis (PCA)

In [1]:
import numpy as np
A = np.array([[2.5,2.4],[0.5,0.7], [2.2,2.9],[1.9,2.2],[3.1,3.0],[2.3,2.7],[2,1.6],[1,1.1],[1.5,1.6],[1.1,0.9]])

In [2]:
M=np.mean(A.T,axis=1)
M

array([1.81, 1.91])

In [3]:
# Center columns by subtracting column means
C=A-M
C

array([[ 0.69,  0.49],
       [-1.31, -1.21],
       [ 0.39,  0.99],
       [ 0.09,  0.29],
       [ 1.29,  1.09],
       [ 0.49,  0.79],
       [ 0.19, -0.31],
       [-0.81, -0.81],
       [-0.31, -0.31],
       [-0.71, -1.01]])

In [4]:
# Calculate covariance matrix
V=np.cov(C.T)
print('\n Covariance matrix \n',V)


 Covariance matrix 
 [[0.61655556 0.61544444]
 [0.61544444 0.71655556]]


In [5]:
# Calculating Eigenvalues and Eigenvectors of the covariance matrix
eigen_values, eigen_vectors= np.linalg.eig(V)
print('\n Eigen vectors \n',eigen_vectors)
print('\n Eigen values \n',eigen_values)


 Eigen vectors 
 [[-0.73517866 -0.6778734 ]
 [ 0.6778734  -0.73517866]]

 Eigen values 
 [0.0490834  1.28402771]


In [6]:
# Sort the eigenvalues in descending order
sorted_index = np.argsort(eigen_values)[::-1]
sorted_eigenvalue = eigen_values[sorted_index]
print('\n Sorted Eigen Value \n',sorted_eigenvalue)

# Sort the eigenvectors 
sorted_eigenvectors = eigen_vectors[:,sorted_index]
print('\n Sorted Eigen Vectors \n',sorted_eigenvectors)



 Sorted Eigen Value 
 [1.28402771 0.0490834 ]

 Sorted Eigen Vectors 
 [[-0.6778734  -0.73517866]
 [-0.73517866  0.6778734 ]]


In [7]:
# Select the first n eigenvectors, n is desired dimension of our final reduced data.
 
n_components = 2 # Can select any number of components
eigenvector_subset = sorted_eigenvectors[:,0:n_components]
eigenvector_subset

array([[-0.6778734 , -0.73517866],
       [-0.73517866,  0.6778734 ]])

In [8]:
#Transform the data 
A_reduced = np.dot(eigenvector_subset.transpose(),C.transpose()).transpose()
A_reduced

array([[-0.82797019, -0.17511531],
       [ 1.77758033,  0.14285723],
       [-0.99219749,  0.38437499],
       [-0.27421042,  0.13041721],
       [-1.67580142, -0.20949846],
       [-0.9129491 ,  0.17528244],
       [ 0.09910944, -0.3498247 ],
       [ 1.14457216,  0.04641726],
       [ 0.43804614,  0.01776463],
       [ 1.22382056, -0.16267529]])

##Calculate Principal Component Analysis using the PCA() class in the scikit-learn library

In [9]:
# Principal Component Analysis
from numpy import array
from sklearn.decomposition import PCA

# Define the matrix
print('Given matrix',A)

Given matrix [[2.5 2.4]
 [0.5 0.7]
 [2.2 2.9]
 [1.9 2.2]
 [3.1 3. ]
 [2.3 2.7]
 [2.  1.6]
 [1.  1.1]
 [1.5 1.6]
 [1.1 0.9]]


In [10]:
# Create the PCA instance
n_components=2
pca = PCA(n_components)

In [11]:
# Fit on data
pca.fit(A)

PCA(n_components=2)

In [12]:
# Access vectors and values
print(pca.components_)
print(pca.explained_variance_)

[[-0.6778734  -0.73517866]
 [-0.73517866  0.6778734 ]]
[1.28402771 0.0490834 ]


In [13]:
# Transform the data
A_reduced = pca.transform(A)
print(A_reduced)

[[-0.82797019 -0.17511531]
 [ 1.77758033  0.14285723]
 [-0.99219749  0.38437499]
 [-0.27421042  0.13041721]
 [-1.67580142 -0.20949846]
 [-0.9129491   0.17528244]
 [ 0.09910944 -0.3498247 ]
 [ 1.14457216  0.04641726]
 [ 0.43804614  0.01776463]
 [ 1.22382056 -0.16267529]]
