In [163]:
import pickle
import os
import pandas as pd
import numpy as np


In [164]:
train_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_train.pkl"
test_file = "/kaggle/input/fii-nn-2025-homework-2/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

In [165]:
train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)


In [166]:
test_data = []
for image, label in test:
    test_data.append(image.flatten())


In [167]:
def softmax(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)
    
def cross_entropy_loss(y_true,y_pred):
    y_pred=np.clip(y_pred, 1e-12, 1.0)
    m=y_true.shape[0]
    loss=-np.sum(y_true*np.log(y_pred))/m
    return loss
    
def one_hot_encode(y, n_classes=10):
    m=y.shape[0]
    encoded=np.zeros((m, n_classes))
    encoded[np.arange(m),y]=1
    return encoded

In [168]:
def forward_propagation(X, W, b):
    z=np.dot(X, W)+b
    y_pred=softmax(z)
    return y_pred, z

def backward_propagation(X, y_true, y_pred, lr=0.01):
    m=X.shape[0]
    dZ=y_true-y_pred
    dW=np.dot(X.T, dZ)/m
    db=np.sum(dZ, axis=0, keepdims=True)/m
    W_update=lr*dW
    b_update=lr*db
    return W_update, b_update

def train(X, y, lr_max=1, lr_min=0.01, epochs=150):
    m, n_features=X.shape
    n_classes=10
    W=np.random.randn(n_features, n_classes)*0.05
    b=np.zeros((1, n_classes))
    y_encoded=one_hot_encode(y, n_classes)
    for epoch in range(epochs):
        lr=lr_min+0.5*(lr_max-lr_min)*(1+np.cos(np.pi*epoch/epochs))
        y_pred, _=forward_propagation(X, W, b)
        loss=cross_entropy_loss(y_encoded, y_pred)
        dW, db=backward_propagation(X, y_encoded, y_pred, lr)
        W=W+dW
        b=b+db
        if epoch%10==0 or epoch==epochs-1:
            acc=np.mean(np.argmax(y_pred, axis=1)==y)
            print(f"Epoch {epoch:3d}|Loss: {loss:4f}|Acc: {acc*100:.2f}%")
    return W, b
    
def predict(X, W, b):
    y_pred, _=forward_propagation(X, W, b)
    return np.argmax(y_pred, axis=1)

In [169]:
train_X=np.array(train_data)/255.0
train_y=np.array(train_labels)
test_X=np.array(test_data)/255.0
mean_img=np.mean(train_X, axis=0)
std_img=np.std(train_X, axis=0)+1e-8
train_X=(train_X-mean_img)/std_img
test_X=(test_X-mean_img)/std_img

W, b=train(train_X, train_y, lr_max=2, lr_min=0.01, epochs=200)

Epoch   0|Loss: 2.849151|Acc: 10.51%
Epoch  10|Loss: 0.546509|Acc: 84.95%
Epoch  20|Loss: 0.313964|Acc: 91.40%
Epoch  30|Loss: 0.284255|Acc: 92.12%
Epoch  40|Loss: 0.272995|Acc: 92.41%
Epoch  50|Loss: 0.266734|Acc: 92.61%
Epoch  60|Loss: 0.262494|Acc: 92.72%
Epoch  70|Loss: 0.259344|Acc: 92.81%
Epoch  80|Loss: 0.256921|Acc: 92.90%
Epoch  90|Loss: 0.255016|Acc: 92.96%
Epoch 100|Loss: 0.253504|Acc: 93.02%
Epoch 110|Loss: 0.252302|Acc: 93.06%
Epoch 120|Loss: 0.251354|Acc: 93.10%
Epoch 130|Loss: 0.250616|Acc: 93.12%
Epoch 140|Loss: 0.250054|Acc: 93.13%
Epoch 150|Loss: 0.249639|Acc: 93.13%
Epoch 160|Loss: 0.249343|Acc: 93.14%
Epoch 170|Loss: 0.249144|Acc: 93.14%
Epoch 180|Loss: 0.249016|Acc: 93.15%
Epoch 190|Loss: 0.248937|Acc: 93.15%
Epoch 199|Loss: 0.248888|Acc: 93.15%


In [170]:
predictions = predict(test_X, W, b)

In [171]:
# This is how you prepare a submission for the competition
predictions_csv = {
    "ID": [],
    "target": [],
}

for i, label in enumerate(predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)