In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import os

# ========== Utility Functions ==========

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

def cross_entropy(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-8), axis=1))

def load_data(path):
    data = pd.read_csv(path).values
    X = data[:, 1:] / 255.0
    y = data[:, 0].reshape(-1, 1)
    encoder = OneHotEncoder(sparse_output=False)
    y_encoded = encoder.fit_transform(y)
    return X, y_encoded, y

# ========== Visualization ==========

def save_activation_image(vector, shape, filename, cmap='viridis'):
    arr = vector.reshape(shape)
    plt.imshow(arr, cmap=cmap)
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(filename, bbox_inches='tight', pad_inches=0)
    plt.close()

# ========== Neural Network ==========

class NeuralNet:
    def __init__(self):
        self.W1 = np.random.randn(784, 121) * 0.01
        self.b1 = np.zeros((1, 121))
        self.W2 = np.random.randn(121, 25) * 0.01
        self.b2 = np.zeros((1, 25))
        self.W3 = np.random.randn(25, 10) * 0.01
        self.b3 = np.zeros((1, 10))

    def forward(self, X, store_images=False, epoch=None):
        self.A0 = X
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = relu(self.Z1)

        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = relu(self.Z2)

        self.Z3 = self.A2 @ self.W3 + self.b3
        self.A3 = softmax(self.Z3)

        if store_images and epoch is not None:
            folder = f"epoch_images/epoch_{epoch+1}"
            os.makedirs(folder, exist_ok=True)

            # Take the first sample only for visualization
            save_activation_image(self.A0[0], (28, 28), f"{folder}/input_28x28.png")
            save_activation_image(self.A1[0], (11, 11), f"{folder}/layer1_11x11.png")
            save_activation_image(self.A2[0], (5, 5), f"{folder}/layer2_5x5.png")
            save_activation_image(self.A3[0], (1, 10), f"{folder}/output_1x10.png", cmap='hot')

        return self.A3

    def backward(self, y, output, lr):
        m = y.shape[0]

        dZ3 = output - y
        dW3 = self.A2.T @ dZ3 / m
        db3 = np.sum(dZ3, axis=0, keepdims=True) / m

        dA2 = dZ3 @ self.W3.T
        dZ2 = dA2 * relu_derivative(self.Z2)
        dW2 = self.A1.T @ dZ2 / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * relu_derivative(self.Z1)
        dW1 = self.A0.T @ dZ1 / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        # Update weights
        self.W1 -= lr * dW1
        self.b1 -= lr * db1
        self.W2 -= lr * dW2
        self.b2 -= lr * db2
        self.W3 -= lr * dW3
        self.b3 -= lr * db3

    def train(self, X, y, epochs=10, lr=0.1):
        for epoch in range(epochs):
            output = self.forward(X, store_images=True, epoch=epoch)
            loss = cross_entropy(y, output)
            self.backward(y, output, lr)
            print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

# ========== Run the Model ==========

X_train, y_train_encoded, y_train = load_data("mnist_train.csv")
X_test, y_test_encoded, y_test = load_data("mnist_test.csv")

model = NeuralNet()
model.train(X_train[:1000], y_train_encoded[:1000], epochs=10, lr=0.1)

preds = model.predict(X_test[:500])
acc = accuracy_score(y_test[:500], preds)
print(f"Test Accuracy: {acc * 100:.2f}%")


Epoch 1, Loss: 2.3026
Epoch 2, Loss: 2.3025
Epoch 3, Loss: 2.3024
Epoch 4, Loss: 2.3023
Epoch 5, Loss: 2.3022
Epoch 6, Loss: 2.3021
Epoch 7, Loss: 2.3021
Epoch 8, Loss: 2.3020
Epoch 9, Loss: 2.3019
Epoch 10, Loss: 2.3018
Test Accuracy: 9.80%
