In [28]:
import numpy as np

# Given dataset
X = np.array([[1, 2, 2], [1, 2, 1], [3, 2, 1], [1, 2, 3]])
# Step 1: Calculate the mean vector
mean_vector = np.mean(X, axis=0)

# Step 2: Subtract the mean vector from each data point
centered_data = X - mean_vector
centered_data


array([[-0.5 ,  0.  ,  0.25],
       [-0.5 ,  0.  , -0.75],
       [ 1.5 ,  0.  , -0.75],
       [-0.5 ,  0.  ,  1.25]])

In [29]:
# Step 3: Calculate the transpose of the centered data matrix
centered_data_T = centered_data.T
centered_data_T

array([[-0.5 , -0.5 ,  1.5 , -0.5 ],
       [ 0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.25, -0.75, -0.75,  1.25]])

In [30]:
# Step 4: Multiply the centered data matrix by its transpose
covariance_matrix = np.dot(centered_data_T, centered_data)
covariance_matrix

array([[ 3.  ,  0.  , -1.5 ],
       [ 0.  ,  0.  ,  0.  ],
       [-1.5 ,  0.  ,  2.75]])

In [31]:
# Step 5: Divide the result by (n-1)
n = X.shape[1]  # number of samples
n

3

In [32]:
covariance_matrix_normalized = covariance_matrix / (n - 1)

print("Covariance Matrix normalized by (n-1):")
print(covariance_matrix_normalized)

Covariance Matrix normalized by (n-1):
[[ 1.5    0.    -0.75 ]
 [ 0.     0.     0.   ]
 [-0.75   0.     1.375]]


In [34]:
# Eigen decomposition of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix_normalized)

In [35]:
eigenvalues

array([2.19009966, 0.68490034, 0.        ])

In [36]:
eigenvectors

array([[ 0.73588229,  0.67710949,  0.        ],
       [ 0.        ,  0.        ,  1.        ],
       [-0.67710949,  0.73588229,  0.        ]])

In [37]:
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

In [38]:
# Select first k eigenvectors (here, let's say k=2 for simplicity)
k = 2
projection_matrix = eigenvectors[:, :k]

In [39]:
projection_matrix

array([[ 0.73588229,  0.67710949],
       [ 0.        ,  0.        ],
       [-0.67710949,  0.73588229]])

In [40]:
principal_components = eigenvectors
variances = eigenvalues

In [41]:
print("\nPrincipal Components:\n", principal_components)
print("\nVariances:\n", variances)


Principal Components:
 [[ 0.73588229  0.67710949  0.        ]
 [ 0.          0.          1.        ]
 [-0.67710949  0.73588229  0.        ]]

Variances:
 [2.19009966 0.68490034 0.        ]
