# Principle Component Analysis - The PCA

In [49]:
import numpy as np
A = np.array([
    [0,3,4],
    [1,2,4],
    [3,4,5]
])

In [50]:
cov_mat = np.cov(A)
cov_mat

array([[4.33333333, 2.83333333, 2.        ],
       [2.83333333, 2.33333333, 1.5       ],
       [2.        , 1.5       , 1.        ]])

In [51]:
corr_mat = np.corrcoef(A)
corr_mat

array([[1.        , 0.89104211, 0.96076892],
       [0.89104211, 1.        , 0.98198051],
       [0.96076892, 0.98198051, 1.        ]])

# How PCA Works?
### The orthogonal projection of data from high dimensions to lower dimensions such that:

# Maximizes the variance of the projected line (purple)
# Minimizes the MSE between the data points and projections (blue)

In [62]:
A = np.array([
    [1,2,3],
    [2,3,2]
])
A = np.array([
    [1,-1],
    [0,1],
    [-1,0]
])

# step_1:Mean center the data 

In [63]:
A_Mean = np.mean(A)
print('Total_Mean:',A_Mean)
A_Mean = np.mean(A, axis = 0)
print('Mean Along Columns:',A_Mean)

Total_Mean: 0.0
Mean Along Columns: [0. 0.]


In [64]:
# Mean Center Data(MCD)
A_MCD = A- A_Mean
A_MCD

array([[ 1., -1.],
       [ 0.,  1.],
       [-1.,  0.]])

# Step_2: Calculate Covariance Matrix: Variance in data

In [65]:
A_cov = np.cov(A_MCD, rowvar = False)
A_cov = np.matmul(A_MCD.T,A_MCD)
A_cov

array([[ 2., -1.],
       [-1.,  2.]])

# Step_3:Calculate Eigen Values and Eigen Vectors

In [66]:
eigen_val_A, eigen_vec_A = np.linalg.eigh(A_cov)
eigen_val_A, eigen_vec_A

(array([1., 3.]),
 array([[-0.70710678, -0.70710678],
        [-0.70710678,  0.70710678]]))

# Step_4. Sort Eigenvalues in descending order

In [67]:
sort_idx = np.argsort(eigen_val_A)[::-1]
sort_idx

array([1, 0], dtype=int64)

In [68]:
Sorted_eigen_vals = eigen_val_A[sort_idx]
Sorted_eigen_vals, eigen_val_A

(array([3., 1.]), array([1., 3.]))

In [69]:
Sorted_eigen_vectors = eigen_vec_A[sort_idx]
Sorted_eigen_vectors, eigen_vec_A

(array([[-0.70710678,  0.70710678],
        [-0.70710678, -0.70710678]]),
 array([[-0.70710678, -0.70710678],
        [-0.70710678,  0.70710678]]))

# Step_5:5. Select a subset from the rearranged Eigenvalue matrix or Number of PCA components

In [70]:
n_comp = 1
eigen_vector_subset = Sorted_eigen_vectors[:,0:n_comp]
eigen_vector_subset

array([[-0.70710678],
       [-0.70710678]])

# Step_6: Transform the data with reduced dimensions

In [71]:
#Transform the data 
A_reduced = np.dot(eigen_vector_subset.transpose(),A_MCD.transpose()).transpose()
A_reduced 

array([[ 0.        ],
       [-0.70710678],
       [ 0.70710678]])