ref. https://scentellegher.github.io/machine-learning/2020/01/27/pca-loadings-sklearn.html

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn import decomposition
from sklearn import datasets
from sklearn.preprocessing import scale

# load iris dataset
iris = datasets.load_iris()

X = scale(iris.data)
y = iris.target

# apply PCA
pca = decomposition.PCA(n_components=2)
X = pca.fit_transform(X)

In [None]:
var_ratio = pca.explained_variance_ratio_
print(f"PC 1       : {var_ratio[0] * 100:.2f}")
print(f"PC 2       : {var_ratio[1] * 100:.2f}")
print(f"PC SUM     : {sum(var_ratio) * 100:.2f}")

PC 1       : 72.96
PC 2       : 22.85
PC SUM     : 95.81


Weights

In [None]:
loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=iris.feature_names)
# TODO weights or loadings?
print(loadings)

                        PC1       PC2
sepal length (cm)  0.521066  0.377418
sepal width (cm)  -0.269347  0.923296
petal length (cm)  0.580413  0.024492
petal width (cm)   0.564857  0.066942


Loading Matrix

- Another useful way to interpret PCA is by computing the correlations between the original variable and the principal components.

In [None]:
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)

loading_matrix = pd.DataFrame(loadings, columns=['PC1', 'PC2'], index=iris.feature_names)
print("loading matrix")
print(loading_matrix)

loading matrix
                        PC1       PC2
sepal length (cm)  0.893151  0.362039
sepal width (cm)  -0.461684  0.885673
petal length (cm)  0.994877  0.023494
petal width (cm)   0.968212  0.064214
