In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
X = pd.read_csv('train\X_train.txt', delim_whitespace=True, header=None).values
y = pd.read_csv('train\y_train.txt', delim_whitespace=True, header=None).values.ravel()

# Step 1: Mean Centering
def center_data(X):
    m, n = X.shape
    means = [sum(X[:, j]) / m for j in range(n)]
    X_centered = [[X[i][j] - means[j] for j in range(n)] for i in range(m)]
    return np.array(X_centered), np.array(means)

# Step 2: Covariance Matrix (Manual)
def compute_covariance_matrix(X):
    m, n = X.shape
    cov_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            cov_matrix[i, j] = sum(X[:, i] * X[:, j]) / (m - 1)
    return cov_matrix

# Step 3: Power Iteration (manual eigenvalue/eigenvector extraction)
def power_iteration(A, num_iterations=1000, tolerance=1e-10):
    b_k = np.random.rand(A.shape[1])
    b_k = b_k / np.linalg.norm(b_k)

    for _ in range(num_iterations):
        b_k1 = np.dot(A, b_k)
        b_k1_norm = np.linalg.norm(b_k1)
        if b_k1_norm == 0:
            break
        b_k_new = b_k1 / b_k1_norm
        if np.linalg.norm(b_k - b_k_new) < tolerance:
            break
        b_k = b_k_new

    eigenvalue = np.dot(b_k.T, np.dot(A, b_k))
    return eigenvalue, b_k

# Step 4: Get Top-k Eigenvectors using Deflation
def get_top_k_components(cov_matrix, k):
    n = cov_matrix.shape[0]
    eigenvalues = []
    eigenvectors = []
    A = np.copy(cov_matrix)

    for _ in range(k):
        eigval, eigvec = power_iteration(A)
        eigenvalues.append(eigval)
        eigenvectors.append(eigvec)
        # Deflation step
        A = A - eigval * np.outer(eigvec, eigvec)

    return np.array(eigenvalues), np.array(eigenvectors).T  # shape = (features, k)

# Step 5: Project data onto new k-dimensional subspace
def project_data(X, eigenvectors):
    return np.dot(X, eigenvectors)

# Step 6: Visualize in 2D
def visualize_2d(X_reduced, y):
    activity_labels = {
        1: 'WALKING',
        2: 'WALKING_UPSTAIRS',
        3: 'WALKING_DOWNSTAIRS',
        4: 'SITTING',
        5: 'STANDING',
        6: 'LAYING'
    }
    plt.figure(figsize=(10, 8))
    for label in np.unique(y):
        indices = y == label
        plt.scatter(X_reduced[indices, 0], X_reduced[indices, 1], label=activity_labels[label], alpha=0.5)
    plt.xlabel("Principal Component 1")
    plt.ylabel("Principal Component 2")
    plt.title("PCA Projection (2D) - From Scratch")
    plt.legend()
    plt.grid(True)
    plt.show()

# MAIN PIPELINE
X_centered, means = center_data(X)
cov_matrix = compute_covariance_matrix(X_centered)
k = 2
eigenvalues, eigenvectors = get_top_k_components(cov_matrix, k)
X_reduced = project_data(X_centered, eigenvectors)

# Explained Variance
total_variance = sum(eigenvalues)
for i, val in enumerate(eigenvalues):
    print(f"Principal Component {i+1} explains {val / total_variance:.2%} of the variance.")

# Visualize
visualize_2d(X_reduced, y)
