In [1]:
import pickle
import os
import pandas as pd
import numpy as np


In [2]:
train_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_train.pkl"
test_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

In [3]:
train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)


In [4]:
test_data = []
for image, label in test:
    test_data.append(image.flatten())


In [5]:
train_data=np.array(train_data) /255.0
train_labels=np.array(train_labels)
test_data=np.array(test_data) / 255.0

m,n=train_data.shape
nr_classes=10

W=np.random.randn(n,nr_classes)*np.sqrt(1/784)
b=np.zeros((nr_classes,))


In [6]:
def softmax(z):
    exp_z=np.exp(z-np.max(z,axis=1,keepdims=True))
    return exp_z/np.sum(exp_z,axis=1,keepdims=True)
def forward(X,W,b):
    z=np.dot(X,W)+b
    y_forward=softmax(z)
    return y_forward

In [7]:
def one_hot_encode(y,nr_classes):
    m=y.shape[0]
    one_hot=np.zeros((m,nr_classes))
    one_hot[np.arange(m),y]=1
    return one_hot
def compute_loss(y_forward,y_true):
    m=y_true.shape[0]
    ep=1e-10
    loss=-np.sum(y_true*np.log(y_forward+ep))/m
    return loss

In [8]:
def backward(X,y_forward,y_true,W,b,learning_rate):
    m=X.shape[0]
    dZ=y_forward-y_true
    dW=np.dot(X.T,dZ)/m
    db=np.sum(dZ,axis=0)/m

    W-=learning_rate*dW
    b-=learning_rate*db

    return W,b

In [9]:
learning_rate = 0.18
epochs = 300
batch_size = 128

Y_train = one_hot_encode(train_labels, nr_classes)

for epoch in range(epochs):
    indices = np.random.permutation(train_data.shape[0])
    X_shuffled = train_data[indices]
    Y_shuffled = Y_train[indices]

    for i in range(0, X_shuffled.shape[0], batch_size):
        X_batch = X_shuffled[i:i+batch_size]
        Y_batch = Y_shuffled[i:i+batch_size]

        y_forward = forward(X_batch, W, b)
        loss = compute_loss(y_forward, Y_batch)
        W, b = backward(X_batch, y_forward, Y_batch, W, b, learning_rate)

    if (epoch + 1) % 10 == 0:
        y_train_pred = forward(train_data, W, b)
        y_pred_labels = np.argmax(y_train_pred, axis=1)
        train_acc = np.mean(y_pred_labels == train_labels) * 100
        print(f"Epoch {epoch+1}/{epochs} | Accuracy: {train_acc:.2f}%")


Epoch 10/300 | Accuracy: 92.45%
Epoch 20/300 | Accuracy: 92.81%
Epoch 30/300 | Accuracy: 92.87%
Epoch 40/300 | Accuracy: 93.15%
Epoch 50/300 | Accuracy: 93.29%
Epoch 60/300 | Accuracy: 93.26%
Epoch 70/300 | Accuracy: 93.35%
Epoch 80/300 | Accuracy: 93.45%
Epoch 90/300 | Accuracy: 93.43%
Epoch 100/300 | Accuracy: 93.46%
Epoch 110/300 | Accuracy: 93.46%
Epoch 120/300 | Accuracy: 93.59%
Epoch 130/300 | Accuracy: 93.62%
Epoch 140/300 | Accuracy: 93.61%
Epoch 150/300 | Accuracy: 93.64%
Epoch 160/300 | Accuracy: 93.66%
Epoch 170/300 | Accuracy: 93.61%
Epoch 180/300 | Accuracy: 93.67%
Epoch 190/300 | Accuracy: 93.72%
Epoch 200/300 | Accuracy: 93.69%
Epoch 210/300 | Accuracy: 93.68%
Epoch 220/300 | Accuracy: 93.73%
Epoch 230/300 | Accuracy: 93.79%
Epoch 240/300 | Accuracy: 93.75%
Epoch 250/300 | Accuracy: 93.72%
Epoch 260/300 | Accuracy: 93.79%
Epoch 270/300 | Accuracy: 93.70%
Epoch 280/300 | Accuracy: 93.83%
Epoch 290/300 | Accuracy: 93.86%
Epoch 300/300 | Accuracy: 93.73%


In [10]:
y_forward_test=forward(test_data,W,b)
predictions=np.argmax(y_forward_test,axis=1)


In [11]:
# This is how you prepare a submission for the competition
predictions_csv = {
    "ID": [],
    "target": [],
}

for i, label in enumerate(predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)

In [12]:
y_forward_train=forward(train_data,W,b)
y_pred_train=np.argmax(y_forward_train,axis=1)
accuracy=np.mean(y_pred_train==train_labels)*100

print(f"Accuracy: {accuracy: .2f}%")

Accuracy:  93.73%
