Principle Component Analysis on a given dataset, containing two feaatures, X having values 4, 8, 13, 7 and Y having values 11, 4, 5, 14

In [3]:
import numpy as np

# Step 1: Define the dataset
X = np.array([4, 8, 13, 7])
Y = np.array([11, 4, 5, 14])

# Step 2: Construct the data matrix
data = np.vstack((X, Y)).T  # Shape (4,2)

# Step 3: Compute the mean of each feature
mean_X = np.mean(X)
mean_Y = np.mean(Y)
mean_vector = np.array([mean_X, mean_Y])

# Step 4: Center the data (subtract mean)
centered_data = data - mean_vector

# Step 5: Compute the covariance matrix
cov_matrix = (centered_data.T @ centered_data) / (len(X) - 1)

# Step 6: Compute the eigenvalues and eigenvectors
eig_values, eig_vectors = np.linalg.eig(cov_matrix)

# Step 7: Sort eigenvalues and eigenvectors
sorted_indices = np.argsort(eig_values)[::-1]
eig_values = eig_values[sorted_indices]
eig_vectors = eig_vectors[:, sorted_indices]

# Step 8: Project data onto the principal components
projected_data = centered_data @ eig_vectors

# Display results
print("Mean Vector:", mean_vector)
print("Covariance Matrix:\n", cov_matrix)
print("Eigenvalues:", eig_values)
print("Eigenvectors:\n", eig_vectors)
print("Projected Data:\n", projected_data)

Mean Vector: [8.  8.5]
Covariance Matrix:
 [[ 14. -11.]
 [-11.  23.]]
Eigenvalues: [30.38486432  6.61513568]
Eigenvectors:
 [[ 0.55738997 -0.83025082]
 [-0.83025082 -0.55738997]]
Projected Data:
 [[-4.30518692  1.92752836]
 [ 3.73612869  2.50825486]
 [ 5.69282771 -2.20038921]
 [-5.12376947 -2.23539401]]
