In [31]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
import time

In [None]:
dftrain = pd.read_csv('/content/MNIST_train.csv')
dfval = pd.read_csv('/content/MNIST_validation.csv')

In [None]:
dftrain = dftrain.drop('even', axis=1)

In [None]:
dfval = dfval.drop('even', axis=1)

In [None]:
featurecols = list(dftrain.columns)
targetcol = 'label'
featurecols.remove(targetcol)

print('length of featurecolumns is', len(featurecols))


In [None]:
Xtrain = np.array(dftrain[featurecols]) / 255
ytrain = np.array(dftrain[targetcol])

Xval = np.array(dfval[featurecols]) / 255
yval = np.array(dfval[targetcol])

In [33]:
class PCAModel:
    def __init__(self, n_components):
        self.n_components = n_components
        self.mean = None
        self.components = None
        self.explained_variance = None

    def fit(self, X):
        X = np.array(X, dtype=float)

        self.mean = np.mean(X, axis=0)
        X_centered = X - self.mean

        cov_matrix = np.cov(X_centered, rowvar=False)
        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

        sorted_idx = np.argsort(eigenvalues)[::-1]

        self.explained_variance = eigenvalues[sorted_idx][:self.n_components]
        self.components = eigenvectors[:, sorted_idx][:, :self.n_components]

    def predict(self, X):
        if self.mean is None:
            raise ValueError("PCA is not fitted yet.")

        X_centered = X - self.mean
        return np.dot(X_centered, self.components)

    def reconstruct(self, X):
        Z = self.predict(X)
        return np.dot(Z, self.components.T) + self.mean

In [34]:
class SoftmaxRegression:
    def __init__(self, learning_rate=0.1, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.W = None
        self.b = None

    def _softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def _one_hot(self, y, num_classes):
        return np.eye(num_classes)[y]

    def _cross_entropy_loss(self, y_true, y_pred):
        return -np.mean(np.sum(y_true * np.log(y_pred + 1e-15), axis=1))

    def fit(self, X, y):
        num_samples, num_features = X.shape
        num_classes = np.max(y) + 1

        self.W = np.random.randn(num_features, num_classes) * 0.01
        self.b = np.zeros((1, num_classes))

        Y_onehot = self._one_hot(y, num_classes)

        for epoch in range(self.epochs):
            logits = np.dot(X, self.W) + self.b
            probs = self._softmax(logits)
            loss = self._cross_entropy_loss(Y_onehot, probs)

            grad_logits = (1. / num_samples) * (Y_onehot - probs)
            grad_W = -np.dot(X.T, grad_logits)
            grad_b = -np.sum(grad_logits, axis=0, keepdims=True)

            self.W -= self.learning_rate * grad_W
            self.b -= self.learning_rate * grad_b

            if epoch % 100 == 0:
                print(f"Epoch {epoch}: Loss = {loss:.4f}")

    def predict_proba(self, X):
        logits = np.dot(X, self.W) + self.b
        return self._softmax(logits)

    def predict(self, X):
        probs = self.predict_proba(X)
        return np.argmax(probs, axis=1)


In [39]:
n_components = 100
pca = PCAModel(n_components=n_components)

print("\nFitting PCA...")
pca.fit(Xtrain)

Xtrain_pca = pca.predict(Xtrain)
Xval_pca = pca.predict(Xval)

print("Shape after PCA:", Xtrain_pca.shape)
epochs = 1000
lr = 0.1

model = SoftmaxRegression(learning_rate=lr, epochs=epochs)

print("\nTraining Softmax Regression...")
start = time.time()
model.fit(Xtrain_pca, ytrain)
end = time.time()

print(f"\nTraining time: {end - start:.2f} seconds")

ypred_train = model.predict(Xtrain_pca)
ypred_test = model.predict(Xval_pca)
train_acc = accuracy_score(ytrain, ypred_train)
val_acc = accuracy_score(yval, ypred_test)

train_f1 = f1_score(ytrain, ypred_train, average='weighted')
val_f1 = f1_score(yval, ypred_test, average='weighted')

print("\n----------------------------")
print("PCA + Softmax Regression RESULTS")
print("----------------------------")
print("Train Accuracy:", train_acc)
print("Valdation Accuracy:", val_acc)
print("Train F1:", train_f1)
print("Validation F1:", val_f1)

length of featurecolumns is 784

Fitting PCA...
Shape after PCA: (10002, 100)

Training Softmax Regression...
Epoch 0: Loss = 2.3020
Epoch 100: Loss = 0.6145
Epoch 200: Loss = 0.4954
Epoch 300: Loss = 0.4460
Epoch 400: Loss = 0.4173
Epoch 500: Loss = 0.3979
Epoch 600: Loss = 0.3836
Epoch 700: Loss = 0.3724
Epoch 800: Loss = 0.3634
Epoch 900: Loss = 0.3558

Training time: 20.01 seconds

----------------------------
PCA + Softmax Regression RESULTS
----------------------------
Train Accuracy: 0.9018196360727855
Valdation Accuracy: 0.9048
Train F1: 0.9013731446516896
Validation F1: 0.9043380506089935


Hypertuning Parameters

In [41]:
Components = [600,500,400,300,200,100,50]

for c in Components:
    n_components = c
    print("\n====================================")
    print("Number of components:", n_components)
    print("====================================")

    pca_c = PCAModel(n_components=n_components)
    print("\nFitting PCA...")
    pca_c.fit(Xtrain)

    Xtrain_pca_c = pca_c.predict(Xtrain)
    Xval_pca_c = pca_c.predict(Xval)

    print("Shape after PCA:", Xtrain_pca_c.shape)

    epochs = 1000
    lr = 0.1

    model_c = SoftmaxRegression(learning_rate=lr, epochs=epochs)

    print("\nTraining Softmax Regression...")
    start = time.time()
    model_c.fit(Xtrain_pca_c, ytrain)
    end = time.time()
    print(f"\nTraining time: {end - start:.2f} seconds")

    ypred_train_c = model_c.predict(Xtrain_pca_c)
    ypred_val_c = model_c.predict(Xval_pca_c)

    train_acc_c = accuracy_score(ytrain, ypred_train_c)
    val_acc_c   = accuracy_score(yval, ypred_val_c)

    train_f1_c = f1_score(ytrain, ypred_train_c, average='weighted')
    val_f1_c   = f1_score(yval, ypred_val_c,  average='weighted')

    print("\n----------------------------")
    print("PCA + Softmax Regression RESULTS")
    print("----------------------------")
    print("Train Accuracy:", train_acc_c)
    print("Validation Accuracy:", val_acc_c)
    print("Train F1:", train_f1_c)
    print("Validation F1:", val_f1_c)



Number of components: 600

Fitting PCA...
Shape after PCA: (10002, 600)

Training Softmax Regression...
Epoch 0: Loss = 2.2876
Epoch 100: Loss = 0.6117
Epoch 200: Loss = 0.4917
Epoch 300: Loss = 0.4413
Epoch 400: Loss = 0.4116
Epoch 500: Loss = 0.3914
Epoch 600: Loss = 0.3762
Epoch 700: Loss = 0.3643
Epoch 800: Loss = 0.3545
Epoch 900: Loss = 0.3462

Training time: 46.21 seconds

----------------------------
PCA + Softmax Regression RESULTS
----------------------------
Train Accuracy: 0.9050189962007599
Validation Accuracy: 0.9048
Train F1: 0.9045975847822953
Validation F1: 0.9043213351107788

Number of components: 500

Fitting PCA...
Shape after PCA: (10002, 500)

Training Softmax Regression...
Epoch 0: Loss = 2.3108
Epoch 100: Loss = 0.6113
Epoch 200: Loss = 0.4914
Epoch 300: Loss = 0.4411
Epoch 400: Loss = 0.4115
Epoch 500: Loss = 0.3913
Epoch 600: Loss = 0.3762
Epoch 700: Loss = 0.3642
Epoch 800: Loss = 0.3544
Epoch 900: Loss = 0.3462

Training time: 38.95 seconds

---------------

In [42]:
n_components = 100
pca = PCAModel(n_components=n_components)

print("\nFitting PCA...")
pca.fit(Xtrain)

Xtrain_pca = pca.predict(Xtrain)
Xval_pca = pca.predict(Xval)

print("Shape after PCA:", Xtrain_pca.shape)

n_epochs = [100, 200, 500, 1000, 2000]
lr = 0.1

for e in n_epochs:
    print("\n==============================")
    print(f"Running epochs = {e}")
    print("==============================")

    model_e = SoftmaxRegression(learning_rate=lr, epochs=e)

    print("\nTraining Softmax Regression...")
    start = time.time()
    model_e.fit(Xtrain_pca, ytrain)
    end = time.time()

    print(f"\nTraining time: {end - start:.2f} seconds")

    ypred_train_e = model_e.predict(Xtrain_pca)
    ypred_val_e   = model_e.predict(Xval_pca)

    train_acc_e = accuracy_score(ytrain, ypred_train_e)
    val_acc_e   = accuracy_score(yval, ypred_val_e)

    train_f1_e = f1_score(ytrain, ypred_train_e, average='weighted')
    val_f1_e   = f1_score(yval, ypred_val_e, average='weighted')

    print("\n----------------------------")
    print("PCA + Softmax Regression RESULTS")
    print("----------------------------")
    print("Train Accuracy:", train_acc_e)
    print("Validation Accuracy:", val_acc_e)
    print("Train F1:", train_f1_e)
    print("Validation F1:", val_f1_e)



Fitting PCA...
Shape after PCA: (10002, 100)

Running epochs = 100

Training Softmax Regression...
Epoch 0: Loss = 2.3170

Training time: 2.32 seconds

----------------------------
PCA + Softmax Regression RESULTS
----------------------------
Train Accuracy: 0.8525294941011797
Validation Accuracy: 0.8613
Train F1: 0.8512051077615719
Validation F1: 0.8597513496821423

Running epochs = 200

Training Softmax Regression...
Epoch 0: Loss = 2.2985
Epoch 100: Loss = 0.6140

Training time: 3.86 seconds

----------------------------
PCA + Softmax Regression RESULTS
----------------------------
Train Accuracy: 0.8723255348930214
Validation Accuracy: 0.88
Train F1: 0.871504161553392
Validation F1: 0.8790233054186843

Running epochs = 500

Training Softmax Regression...
Epoch 0: Loss = 2.3124
Epoch 100: Loss = 0.6146
Epoch 200: Loss = 0.4955
Epoch 300: Loss = 0.4461
Epoch 400: Loss = 0.4174

Training time: 6.34 seconds

----------------------------
PCA + Softmax Regression RESULTS
---------------

In [43]:
n_components = 100
pca = PCAModel(n_components=n_components)

print("\nFitting PCA...")
pca.fit(Xtrain)

Xtrain_pca = pca.predict(Xtrain)
Xval_pca = pca.predict(Xval)

print("Shape after PCA:", Xtrain_pca.shape)

n_epochs = 1000
learnr = [0.01, 0.02, 0.1, 0.2]

for l in learnr:
    print("\n==============================")
    print(f"Running learning rate = {l}")
    print("==============================")

    model_l = SoftmaxRegression(learning_rate=l, epochs=n_epochs)

    print("\nTraining Softmax Regression...")
    start = time.time()
    model_l.fit(Xtrain_pca, ytrain)
    end = time.time()

    print(f"\nTraining time: {end - start:.2f} seconds")

    ypred_train_l = model_l.predict(Xtrain_pca)
    ypred_val_l   = model_l.predict(Xval_pca)

    train_acc_l = accuracy_score(ytrain, ypred_train_l)
    val_acc_l   = accuracy_score(yval, ypred_val_l)

    train_f1_l = f1_score(ytrain, ypred_train_l, average='weighted')
    val_f1_l   = f1_score(yval, ypred_val_l, average='weighted')

    print("\n----------------------------")
    print("PCA + Softmax Regression RESULTS")
    print("----------------------------")
    print("Train Accuracy:", train_acc_l)
    print("Validation Accuracy:", val_acc_l)
    print("Train F1:", train_f1_l)
    print("Validation F1:", val_f1_l)



Fitting PCA...
Shape after PCA: (10002, 100)

Running learning rate = 0.01

Training Softmax Regression...
Epoch 0: Loss = 2.3055
Epoch 100: Loss = 1.5317
Epoch 200: Loss = 1.1800
Epoch 300: Loss = 0.9939
Epoch 400: Loss = 0.8794
Epoch 500: Loss = 0.8014
Epoch 600: Loss = 0.7446
Epoch 700: Loss = 0.7012
Epoch 800: Loss = 0.6668
Epoch 900: Loss = 0.6388

Training time: 15.19 seconds

----------------------------
PCA + Softmax Regression RESULTS
----------------------------
Train Accuracy: 0.8533293341331734
Validation Accuracy: 0.8628
Train F1: 0.8519250350424009
Validation F1: 0.8612817078007639

Running learning rate = 0.02

Training Softmax Regression...
Epoch 0: Loss = 2.2986
Epoch 100: Loss = 1.1780
Epoch 200: Loss = 0.8783
Epoch 300: Loss = 0.7438
Epoch 400: Loss = 0.6662
Epoch 500: Loss = 0.6150
Epoch 600: Loss = 0.5783
Epoch 700: Loss = 0.5505
Epoch 800: Loss = 0.5285
Epoch 900: Loss = 0.5106

Training time: 15.64 seconds

----------------------------
PCA + Softmax Regression R

In [44]:
n_components = 100
pca_best = PCAModel(n_components=n_components)

print("\nFitting PCA...")
pca_best.fit(Xtrain)

Xtrain_pca_best = pca_best.predict(Xtrain)
Xval_pca_best = pca_best.predict(Xval)

print("Shape after PCA:", Xtrain_pca_best.shape)

n_epochs_best = 2000
learnr_best = 0.2

print("\n==============================")
print(f"Running learning rate = {learnr_best}")
print("==============================")

model_best = SoftmaxRegression(learning_rate=learnr_best, epochs=n_epochs_best)

print("\nTraining Softmax Regression...")
start = time.time()
model_best.fit(Xtrain_pca_best, ytrain)
end = time.time()

print(f"\nTraining time: {end - start:.2f} seconds")

ypred_train_best = model_best.predict(Xtrain_pca_best)
ypred_val_best   = model_best.predict(Xval_pca_best)

train_acc_best = accuracy_score(ytrain, ypred_train_best)
val_acc_best   = accuracy_score(yval, ypred_val_best)

train_f1_best = f1_score(ytrain, ypred_train_best, average='weighted')
val_f1_best   = f1_score(yval, ypred_val_best, average='weighted')

print("\n----------------------------")
print("PCA + Softmax Regression RESULTS")
print("----------------------------")
print("Train Accuracy:", train_acc_best)
print("Validation Accuracy:", val_acc_best)
print("Train F1:", train_f1_best)
print("Validation F1:", val_f1_best)



Fitting PCA...
Shape after PCA: (10002, 100)

Running learning rate = 0.2

Training Softmax Regression...
Epoch 0: Loss = 2.3016
Epoch 100: Loss = 0.4940
Epoch 200: Loss = 0.4168
Epoch 300: Loss = 0.3833
Epoch 400: Loss = 0.3632
Epoch 500: Loss = 0.3493
Epoch 600: Loss = 0.3388
Epoch 700: Loss = 0.3305
Epoch 800: Loss = 0.3238
Epoch 900: Loss = 0.3181
Epoch 1000: Loss = 0.3133
Epoch 1100: Loss = 0.3091
Epoch 1200: Loss = 0.3055
Epoch 1300: Loss = 0.3022
Epoch 1400: Loss = 0.2993
Epoch 1500: Loss = 0.2967
Epoch 1600: Loss = 0.2943
Epoch 1700: Loss = 0.2922
Epoch 1800: Loss = 0.2902
Epoch 1900: Loss = 0.2884

Training time: 30.37 seconds

----------------------------
PCA + Softmax Regression RESULTS
----------------------------
Train Accuracy: 0.9166166766646671
Validation Accuracy: 0.9136
Train F1: 0.9163209985737785
Validation F1: 0.9133155312077162
