In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml

In [2]:
# PCA from scratch
class PCA_Scratch:
    def __init__(self, n_components):
        self.n_components = n_components
        self.components = None
        self.mean = None

    def fit(self, X):
        self.mean = np.mean(X, axis=0)
        X_centered = X - self.mean
        cov_matrix = np.cov(X_centered, rowvar=False)
        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
        sorted_idx = np.argsort(eigenvalues)[::-1]
        self.components = eigenvectors[:, sorted_idx[:self.n_components]]

    def transform(self, X):
        X_centered = X - self.mean
        return np.dot(X_centered, self.components)

In [None]:
# Load and preprocess MNIST
mnist = fetch_openml("mnist_784", version=1)
X = mnist.data.values[:1000]  # Use a smaller subset for speed
y = mnist.target.astype(int)[:1000]
X_scaled = X / 255.0  # Normalize pixel values

# Apply PCA
pca = PCA_Scratch(n_components=2)
pca.fit(X_scaled)
X_pca = pca.transform(X_scaled)

# Plot first two principal components
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis', edgecolor='k', s=50)
plt.colorbar(label='Digit Label')
plt.title('PCA: First Two Principal Components of MNIST')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()