In [None]:
# Sec B - Aliza Ashfaq 22K-4566
# Sec C - Misha Imam   22K-4179


import numpy as np

# Load MNIST data (you should download and place these files)
def load_mnist_images(filename):
    with open(filename, 'rb') as f:
        f.read(16)
        data = np.frombuffer(f.read(), np.uint8)
    return data.reshape(-1, 28, 28).astype(np.float32) / 255.0

def load_mnist_labels(filename):
    with open(filename, 'rb') as f:
        f.read(8)
        labels = np.frombuffer(f.read(), np.uint8)
    return labels

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_vals = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_vals / np.sum(exp_vals, axis=1, keepdims=True)

def cross_entropy_loss(predictions, labels):
    m = labels.shape[0]
    p = predictions[range(m), labels]
    return -np.mean(np.log(p + 1e-9))

def cross_entropy_derivative(predictions, labels):
    m = labels.shape[0]
    grad = predictions.copy()
    grad[range(m), labels] -= 1
    return grad / m

def convolve2d(image, kernel, stride=1):
    output_size = (image.shape[0] - kernel.shape[0]) // stride + 1
    output = np.zeros((output_size, output_size))
    for i in range(0, output_size):
        for j in range(0, output_size):
            output[i, j] = np.sum(image[i*stride:i*stride+3, j*stride:j*stride+3] * kernel)
    return output

def convolve_batch(batch, kernel):
    return np.array([convolve2d(img, kernel) for img in batch])

def max_pool(image, size=2, stride=2):
    out_h = image.shape[0] // size
    out_w = image.shape[1] // size
    output = np.zeros((out_h, out_w))
    for i in range(out_h):
        for j in range(out_w):
            output[i, j] = np.max(image[i*stride:i*stride+size, j*stride:j*stride+size])
    return output

def max_pool_batch(batch):
    return np.array([max_pool(img) for img in batch])

def flatten(batch):
    return batch.reshape(batch.shape[0], -1)

# Training hyperparameters
learning_rate = 0.01
epochs = 2
batch_size = 32

# Load data
x_train = load_mnist_images("train-images.idx3-ubyte")
y_train = load_mnist_labels("train-labels.idx1-ubyte")
x_test = load_mnist_images("t10k-images.idx3-ubyte")
y_test = load_mnist_labels("t10k-labels.idx1-ubyte")

# Initialize weights
filter1 = np.random.randn(3, 3) * 0.1
filter2 = np.random.randn(3, 3) * 0.1

input_size = ((28 - 2*2) // 2)**2  # after 2 convolutions and pooling
input_size = (input_size * 1)  # 1 filter output channel
weights_fc = np.random.randn(input_size, 10) * 0.1
biases_fc = np.zeros(10)

for epoch in range(epochs):
    for i in range(0, x_train.shape[0], batch_size):
        batch_imgs = x_train[i:i+batch_size]
        batch_labels = y_train[i:i+batch_size]

        # Forward pass
        conv1 = convolve_batch(batch_imgs, filter1)
        relu1 = relu(conv1)
        conv2 = convolve_batch(relu1, filter2)
        relu2 = relu(conv2)
        pool = max_pool_batch(relu2)
        flat = flatten(pool)
        logits = flat @ weights_fc + biases_fc
        probs = softmax(logits)
        loss = cross_entropy_loss(probs, batch_labels)

        # Backpropagation
        dL_dlogits = cross_entropy_derivative(probs, batch_labels)
        dL_dw_fc = flat.T @ dL_dlogits
        dL_db_fc = np.sum(dL_dlogits, axis=0)
        dL_dflat = dL_dlogits @ weights_fc.T

        # Update fully connected weights
        weights_fc -= learning_rate * dL_dw_fc
        biases_fc -= learning_rate * dL_db_fc

    print(f"Epoch {epoch+1}, Loss: {loss}")

# Evaluate accuracy on test set
test_conv1 = convolve_batch(x_test, filter1)
test_relu1 = relu(test_conv1)
test_conv2 = convolve_batch(test_relu1, filter2)
test_relu2 = relu(test_conv2)
test_pool = max_pool_batch(test_relu2)
test_flat = flatten(test_pool)
test_logits = test_flat @ weights_fc + biases_fc
test_probs = softmax(test_logits)
test_preds = np.argmax(test_probs, axis=1)
accuracy = np.mean(test_preds == y_test)
print(f"Test Accuracy: {accuracy}")
