Quelle: https://github.com/TiongSun/PCA_manual_calculation

In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scipy.linalg as la
from sklearn.decomposition import PCA

'''
Data
'''
data = np.matrix([[1,2,4],
               [4,1,2],
               [5,4,8]])

df = pd.DataFrame(data)

'''
Manual calculation
'''

# standardize data 
standardized_data  = (df - df.mean()) / (df.std())

# Finding covariance
covarance = np.cov(standardized_data.T, bias = 1)

# find eigen value& eigen vector
eigenvalue, eigenvectors = np.linalg.eig(covarance)

# Find PCA
n_components = 3

pca_manual = np.matmul(np.array(standardized_data),eigenvectors)

pca_manual  = pca_manual[:,:n_components]

'''
calculate using SKlearn
'''

# PCA
pca_sklearn = (PCA(n_components).fit_transform(standardized_data))

print('Standardized data')
print(standardized_data.round(2))
print('')

print('Covariance')
print(covarance.round(2))
print('')

print('eigen_value')
print(eigenvalue.round(4))
print('')


print('eigen_vector')
print(eigenvectors.round(4))
print('')

print('PCA manually calculated')
print(pca_manual.round(2))
print('')

print('PCA - sklearn')
print(pca_sklearn.round(2))

Standardized data
      0     1     2
0 -1.12 -0.22 -0.22
1  0.32 -0.87 -0.87
2  0.80  1.09  1.09

Covariance
[[0.67 0.28 0.28]
 [0.28 0.67 0.67]
 [0.28 0.67 0.67]]

eigen_value
[ 1.5171  0.4829 -0.    ]

eigen_vector
[[-0.4215 -0.9068  0.    ]
 [-0.6412  0.2981 -0.7071]
 [-0.6412  0.2981  0.7071]]

PCA manually calculated
[[ 0.75  0.89 -0.  ]
 [ 0.98 -0.81  0.  ]
 [-1.74 -0.08 -0.  ]]

PCA - sklearn
[[-0.75  0.89  0.  ]
 [-0.98 -0.81  0.  ]
 [ 1.74 -0.08  0.  ]]
