In [213]:
import numpy as np
np.set_printoptions(precision=4, suppress=True)

# centering the data

In [214]:
x1 = np.array([1, 5, 2])
x2 = np.array([6, 2, 1])
x3 = np.array([2, 0, 0])
x4 = np.array([5, 2, 0])
x5 = np.array([0, 2, 1])

X = np.column_stack([x1, x2, x3, x4, x5])  # d * n

X
X.shape

array([[1, 6, 2, 5, 0],
       [5, 2, 0, 2, 2],
       [2, 1, 0, 0, 1]])

(3, 5)

In [215]:
X_mean = X.mean(axis=1, keepdims=True)

X_mean
X_mean.shape

array([[2.8],
       [2.2],
       [0.8]])

(3, 1)

In [216]:
a = np.array([
    [10, 20, 30],
    [10, 20, 30],
    [10, 20, 30],
])

b = np.array([9, 2, 2])
a - b

b = np.array([[9, 2, 2]])
a - b

b = np.array([[9], [2], [2]])
a - b

array([[ 1, 18, 28],
       [ 1, 18, 28],
       [ 1, 18, 28]])

array([[ 1, 18, 28],
       [ 1, 18, 28],
       [ 1, 18, 28]])

array([[ 1, 11, 21],
       [ 8, 18, 28],
       [ 8, 18, 28]])

In [217]:
X_centered = X - X_mean

X_centered
X_centered.shape

array([[-1.8,  3.2, -0.8,  2.2, -2.8],
       [ 2.8, -0.2, -2.2, -0.2, -0.2],
       [ 1.2,  0.2, -0.8, -0.8,  0.2]])

(3, 5)

# calculating `C` the covariance matrix

In [218]:
X_centered.shape, X_centered.T.shape
X_centered.T.shape, X_centered.shape

((3, 5), (5, 3))

((5, 3), (3, 5))

In [219]:
C = 1/5 * X_centered @ X_centered.T

C
C.shape

array([[ 5.36, -0.76, -0.64],
       [-0.76,  2.56,  1.04],
       [-0.64,  1.04,  0.56]])

(3, 3)

# calculating eigenvalues and eigenvectors

In [220]:
eigenvalues, eigenvectors = np.linalg.eig(C)

sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors.T[sorted_indices]

eigenvalues
eigenvectors

array([5.7087, 2.6712, 0.1001])

array([[ 0.9424, -0.2852, -0.1748],
       [-0.3293, -0.8828, -0.335 ],
       [ 0.0587, -0.3733,  0.9259]])

In [221]:
for i in range(3):
  print(f"Eigenvalue {i+1}: {eigenvalues[i]:.4f}")
  print(f"Eigenvector {i+1}: {eigenvectors[i]}")
  print(f"Variance explained: {eigenvalues[i] / np.sum(eigenvalues) * 100:.2f}%\n")

Eigenvalue 1: 5.7087
Eigenvector 1: [ 0.9424 -0.2852 -0.1748]
Variance explained: 67.32%

Eigenvalue 2: 2.6712
Eigenvector 2: [-0.3293 -0.8828 -0.335 ]
Variance explained: 31.50%

Eigenvalue 3: 0.1001
Eigenvector 3: [ 0.0587 -0.3733  0.9259]
Variance explained: 1.18%



# projecting the data on PC1

In [222]:
W = eigenvectors[[0]]

W
W.shape
X_centered.shape

array([[ 0.9424, -0.2852, -0.1748]])

(1, 3)

(3, 5)

In [223]:
X_transformed = W @ X_centered

X_transformed
X_transformed.shape

array([[-2.7046,  3.0378,  0.0133,  2.2701, -2.6166]])

(1, 5)

#### reconstructing orig data

In [224]:
W.shape
X_transformed.shape
X.shape

(1, 3)

(1, 5)

(3, 5)

In [225]:
X_reconstructed = W.T @ X_transformed + X_mean

X_reconstructed
X_reconstructed.shape

X
X.shape

array([[0.2512, 5.6628, 2.8125, 4.9394, 0.3341],
       [2.9713, 1.3337, 2.1962, 1.5526, 2.9462],
       [1.2726, 0.2691, 0.7977, 0.4033, 1.2573]])

(3, 5)

array([[1, 6, 2, 5, 0],
       [5, 2, 0, 2, 2],
       [2, 1, 0, 0, 1]])

(3, 5)

In [226]:
np.mean((X - X_reconstructed) ** 2)

np.float64(0.9237776494600244)

# projecting the data on PC1 and PC2

In [227]:
W = eigenvectors[:2]

W
W.shape
X_centered.shape

array([[ 0.9424, -0.2852, -0.1748],
       [-0.3293, -0.8828, -0.335 ]])

(2, 3)

(3, 5)

In [228]:
X_transformed = W @ X_centered

X_transformed
X_transformed.shape

array([[-2.7046,  3.0378,  0.0133,  2.2701, -2.6166],
       [-2.2812, -0.9441,  2.4736, -0.2798,  1.0315]])

(2, 5)

#### reconstructing orig data

In [229]:
W.shape
X_transformed.shape
X.shape

(2, 3)

(2, 5)

(3, 5)

In [230]:
X_reconstructed = W.T @ X_transformed + X_mean

X_reconstructed
X_reconstructed.shape

X
X.shape

array([[ 1.0023,  5.9737,  1.998 ,  5.0315, -0.0056],
       [ 4.9851,  2.1671,  0.0125,  1.7996,  2.0356],
       [ 2.0369,  0.5855, -0.0311,  0.497 ,  0.9117]])

(3, 5)

array([[1, 6, 2, 5, 0],
       [5, 2, 0, 2, 2],
       [2, 1, 0, 0, 1]])

(3, 5)

In [231]:
np.mean((X - X_reconstructed) ** 2)

np.float64(0.03336569308673175)

# projecting the data on PC1 and PC2 and PC3

In [232]:
W = eigenvectors

W
W.shape
X_centered.shape

array([[ 0.9424, -0.2852, -0.1748],
       [-0.3293, -0.8828, -0.335 ],
       [ 0.0587, -0.3733,  0.9259]])

(3, 3)

(3, 5)

In [233]:
X_transformed = W @ X_centered

X_transformed
X_transformed.shape

array([[-2.7046,  3.0378,  0.0133,  2.2701, -2.6166],
       [-2.2812, -0.9441,  2.4736, -0.2798,  1.0315],
       [-0.0399,  0.4477,  0.0336, -0.5368,  0.0954]])

(3, 5)

#### reconstructing orig data

In [234]:
W.shape
X_transformed.shape
X.shape

(3, 3)

(3, 5)

(3, 5)

In [235]:
X_reconstructed = W.T @ X_transformed + X_mean

X_reconstructed
X_reconstructed.shape

X
X.shape

array([[ 1.,  6.,  2.,  5., -0.],
       [ 5.,  2.,  0.,  2.,  2.],
       [ 2.,  1., -0.,  0.,  1.]])

(3, 5)

array([[1, 6, 2, 5, 0],
       [5, 2, 0, 2, 2],
       [2, 1, 0, 0, 1]])

(3, 5)

In [236]:
np.mean((X - X_reconstructed) ** 2)

np.float64(5.012553668591846e-32)