In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mp

In [4]:
data = {
    'F1_Sepal_Length': [5.1, 4.9, 4.7, 4.6, 5.0],
    'F2_Sepal_Width': [3.5, 3.0, 3.2, 3.1, 3.6],
    'F3_Petal_Length': [1.4, 1.4, 1.3, 1.5, 1.4],
    'F4_Petal_Width': [0.2, 0.2, 0.2, 0.2, 0.2]
}

In [5]:
df = pd.DataFrame(data)
X = df.values

In [6]:
print(f"Original Shape: {X.shape}")

Original Shape: (5, 4)


In [7]:
mean_vec = np.mean(X, axis = 0)
std_dev_vec = np.std(X, axis = 0)

In [8]:
std_dev_vec[std_dev_vec == 0] = 1

X_std = (X - mean_vec)/std_dev_vec

In [9]:
print("\nStep 1: Data Standardized (X_std)")
print(X_std[:2])



Step 1: Data Standardized (X_std)
[[ 1.29399328  0.95025527  0.          0.        ]
 [ 0.21566555 -1.2094158   0.          0.        ]]


In [12]:
cov_matrix = np.cov(X_std, rowvar = False)
print("\nStep 2: Covariance Matrix (C)")
print(cov_matrix)


Step 2: Covariance Matrix (C)
[[ 1.25        0.85002412 -0.21312323  0.        ]
 [ 0.85002412  1.25       -0.17073699  0.        ]
 [-0.21312323 -0.17073699  1.25        0.        ]
 [ 0.          0.          0.          0.        ]]


In [13]:
eigen_values, eigen_vectors = np.linalg.eig(cov_matrix)
print("\nStep 3a: Eigenvalues (Variance explained by each Principal Component)")
print(eigen_values)
print("\nStep 3b: Eigenvectors (Principal Component Directions)")
print(eigen_vectors)


Step 3a: Eigenvalues (Variance explained by each Principal Component)
[2.17934293 0.39886389 1.17179318 0.        ]

Step 3b: Eigenvectors (Principal Component Directions)
[[-0.68205368 -0.7107912   0.17198446  0.        ]
 [-0.67537211  0.7024253   0.22465801  0.        ]
 [ 0.28049117 -0.03707531  0.95914031  0.        ]
 [ 0.          0.          0.          1.        ]]


In [14]:
eigen_pairs = [(np.abs(eigen_values[i]), eigen_vectors[:, i]) for i in range(len(eigen_values))]

In [15]:
eigen_pairs.sort(key=lambda x: x[0], reverse=True)

In [16]:
total_variance = sum(eigen_values)
explained_variance_ratio = [(i / total_variance) * 100 for i in sorted(eigen_values, reverse=True)]
print("\nStep 4a: Explained Variance Ratio (%)")
for i, ratio in enumerate(explained_variance_ratio):
    print(f"PC{i+1}: {ratio:.2f}%")


Step 4a: Explained Variance Ratio (%)
PC1: 58.12%
PC2: 31.25%
PC3: 10.64%
PC4: 0.00%


In [17]:
k = 2 
print(f"\nChoosing k={k} components.")


Choosing k=2 components.


In [18]:
W = np.hstack([eigen_pairs[i][1].reshape(X.shape[1], 1) for i in range(k)])

print(f"Projection Matrix (W) Shape: {W.shape}")
print(W)

Projection Matrix (W) Shape: (4, 2)
[[-0.68205368  0.17198446]
 [-0.67537211  0.22465801]
 [ 0.28049117  0.95914031]
 [ 0.          0.        ]]


In [19]:
X_pca = X_std.dot(W)

print(f"\nStep 4c: Final Reduced Data (X_pca) Shape: {X_pca.shape}")
print("First 5 rows of the projected data (Z):")
print(X_pca[:5])


Step 4c: Final Reduced Data (X_pca) Shape: (5, 2)
First 5 rows of the projected data (Z):
[[-1.52434878  0.43602919]
 [ 0.66971022 -0.23461382]
 [ 0.3782595  -1.74252845]
 [ 1.92470547  1.10077422]
 [-1.4483264   0.44033886]]
