In [2]:
import numpy as np
import numpy.linalg as linalg
from sklearn.datasets import make_classification

In [6]:
X = np.array([[1, 2], [3, 4], [5, 6]])
n_samples = X.shape[0]
n_samples
n_features = 2

In [9]:
# We center the data and compute the sample covariance matrix.
X_centered = (X - np.mean(X, axis=0)) # Note the broadcasting!
X_centered 

array([[-2., -2.],
       [ 0.,  0.],
       [ 2.,  2.]])

In [12]:
cov_matrix = np.dot(X_centered.T, X_centered) / (n_samples - 1) # Same as np.cov(X_centered.T)
cov_matrix

array([[4., 4.],
       [4., 4.]])

In [16]:
# Eigendecomposition of covariance matrix
eigenvalues, eigenvectors = linalg.eig(cov_matrix)
# Sort eigenvalues and associated eigenvectors using index-based sorting
idx = eigenvalues.argsort()[::-1]  
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:,idx]
# Eigenvectors corresponding to the k maximum eigenvalues
W = eigenvectors[:,0:n_features]
# Transform the samples onto the new subspace
X_transformed = np.dot(W.T, X_centered.T)
print("Original dataset\n", X)
print("Transformed dataset\n", X_transformed.T)

Original dataset
 [[1 2]
 [3 4]
 [5 6]]
Transformed dataset
 [[-2.82842712  0.        ]
 [ 0.          0.        ]
 [ 2.82842712  0.        ]]


In [17]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

pca = PCA()
pca.fit(X)
# access values and vectors
eigenvalues_pca = pca.explained_variance_
eigenvectors_pca = pca.components_
X_transformed = pca.fit_transform(X)
print("Original dataset\n", X)
print("Transformed dataset\n", X_transformed)

Original dataset
 [[1 2]
 [3 4]
 [5 6]]
Transformed dataset
 [[-2.82842712e+00  4.74426853e-17]
 [ 0.00000000e+00  0.00000000e+00]
 [ 2.82842712e+00  4.74426853e-17]]
