In [2]:
import numpy as np
np.set_printoptions(precision=4, suppress=True)

In [3]:
x1 = np.array([10, 5, 2])
x2 = np.array([6, 2, 1])
x3 = np.array([2, 0, 0])

X = np.column_stack([x1, x2, x3])  # d * n

X
X.shape

array([[10,  6,  2],
       [ 5,  2,  0],
       [ 2,  1,  0]])

(3, 3)

In [4]:
X_mean = np.mean(X, axis=1, keepdims=True)  # mean along rows
X_mean
X_mean.shape

array([[6.    ],
       [2.3333],
       [1.    ]])

(3, 1)

In [7]:
X_centered = X - X_mean
X_centered  # mean of every feature (along rows) is zero
X_centered.shape

array([[ 4.    ,  0.    , -4.    ],
       [ 2.6667, -0.3333, -2.3333],
       [ 1.    ,  0.    , -1.    ]])

(3, 3)

In [8]:
n = X.shape[1]
C = 1/n * X_centered @ X_centered.T  # @ means cross product
C
C.shape

array([[10.6667,  6.6667,  2.6667],
       [ 6.6667,  4.2222,  1.6667],
       [ 2.6667,  1.6667,  0.6667]])

(3, 3)

In [9]:
eigenvalues, eigenvectors = np.linalg.eig(C)

sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors.T[sorted_indices]  # transpose to get eigenvectors as rows

for i in range(n):
  print(f"Eigenvalue {i+1}:   {eigenvalues[i]:.4f}")
  print(f"Eigenvector {i+1}:    {eigenvectors[i]}")
  print(f"Variance explained by eigenvector {i+1}: {
      eigenvalues[i] / np.sum(eigenvalues) * 100:.2f}%\n")

Eigenvalue 1:   15.5150
Eigenvector 1:    [-0.8288 -0.5198 -0.2072]
Variance explained by eigenvector 1: 99.74%

Eigenvalue 2:   0.0406
Eigenvector 2:    [-0.5043  0.8543 -0.1261]
Variance explained by eigenvector 2: 0.26%

Eigenvalue 3:   -0.0000
Eigenvector 3:    [-0.2425  0.      0.9701]
Variance explained by eigenvector 3: -0.00%



In [10]:
X_projected = eigenvectors[:1] @ X_centered
print(f"Projected data on 1st eigenvector: {X_projected}")

Projected data on 1st eigenvector: [[-4.9085  0.1733  4.7352]]


In [11]:
X_reconstructed = eigenvectors[:1].T @ X_projected + X_mean

print("Reconstructed data from 1st eigenvector:")
print(X_reconstructed)

print("\nOriginal data:")
print(X)

reconstruction_error = np.linalg.norm(X - X_reconstructed, ord="fro")
print(f"\nReconstruction error: {reconstruction_error:.4f}")

Reconstructed data from 1st eigenvector:
[[10.0679  5.8564  2.0757]
 [ 4.8849  2.2433 -0.1282]
 [ 2.017   0.9641  0.0189]]

Original data:
[[10  6  2]
 [ 5  2  0]
 [ 2  1  0]]

Reconstruction error: 0.3489


In [13]:
print(f"Projected data on 1st and 2nd eigenvectors:")

X_projected_1_2 = eigenvectors[:2] @ X_centered
X_projected_1_2
X_projected_1_2.shape

Projected data on 1st and 2nd eigenvectors:


array([[-4.9085,  0.1733,  4.7352],
       [ 0.1347, -0.2848,  0.1501]])

(2, 3)

In [14]:
X_reconstructed_1_2 = eigenvectors[:2].T @ X_projected_1_2 + X_mean

print("Reconstructed data from 1st and 2nd eigenvectors:")
print(X_reconstructed_1_2)

print("\nOriginal data:")
print(X)

reconstruction_error_1_2 = np.linalg.norm(X - X_reconstructed_1_2, ord="fro")
print(f"\nReconstruction error: {reconstruction_error_1_2:.4f}")

Reconstructed data from 1st and 2nd eigenvectors:
[[10.  6.  2.]
 [ 5.  2.  0.]
 [ 2.  1. -0.]]

Original data:
[[10  6  2]
 [ 5  2  0]
 [ 2  1  0]]

Reconstruction error: 0.0000
