In [1]:
import pickle
import pandas as pd
import numpy as np

np.random.seed(74)
EPOCHS = 150
BATCH_SIZE = 64
learning_rate = 0.001

FEATURES_COUNT = 784
CLASSES_COUNT = 10

# prevent big values by multipling it with 0.01
W = np.random.randn(FEATURES_COUNT, CLASSES_COUNT) * 0.01

In [2]:
def one_hot_encode(labels):
    m = labels.shape[0]
    one_hot = np.zeros((m, CLASSES_COUNT))
    one_hot[np.arange(m), labels] = 1
    return one_hot

def softmax(scores):
    exp_scores = np.exp(scores - np.max(scores, axis=1, keepdims=True))
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True) 

def cross_entropy_loss(y_pred, y_true):
    epsilon = 1e-10
    sample_count = y_pred.shape[0]
    return -np.sum(y_true * np.log(y_pred + epsilon)) / sample_count

def forward_propagation(X, W, bias):
    z = np.dot(X, W) + bias
    return softmax(z)

def backward_propagation(X, W, bias, y_pred, y_true, learning_rate):
    sample_count = X.shape[0]
    gradient = y_true - y_pred

    # calculate how much we have to adjust the weight
    dW = np.dot(X.T, gradient) / sample_count
    W_new = W + learning_rate * dW

    # calculate how much we have to adjust the bias
    db = np.sum(gradient, axis=0) / sample_count
    bias_new = bias + learning_rate * db
    return W_new, bias_new

In [3]:
train_file = '/kaggle/input/fii-nn-2025-homework-2/extended_mnist_train.pkl'
test_file = '/kaggle/input/fii-nn-2025-homework-2/extended_mnist_test.pkl'

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)
    
train_data = np.array(train_data)
train_labels = np.array(train_labels)

test_data = []
for image, label in test:
    test_data.append(image.flatten())
test_data = np.array(test_data)

In [4]:

bias = np.zeros(CLASSES_COUNT)
y_train_one_hot = one_hot_encode(train_labels)

for epoch in range(EPOCHS):
    samples = train_data.shape[0]
    indices = np.random.permutation(samples)

    x_shuffled = train_data[indices]
    y_shuffled = y_train_one_hot[indices]
    
    epoch_loss = 0
    for i in range(0, samples, BATCH_SIZE):
        x_batch = x_shuffled[i:i+BATCH_SIZE]
        y_batch = y_shuffled[i:i+BATCH_SIZE]

        y_pred = forward_propagation(x_batch, W, bias)

        batch_loss = cross_entropy_loss(y_pred, y_batch)
        epoch_loss += batch_loss

        W, bias = backward_propagation(x_batch, W, bias, y_pred, y_batch, learning_rate)
    
    n_batches = samples / BATCH_SIZE
    avg_loss = epoch_loss / n_batches

    if (epoch + 1) % 10 == 0:
        learning_rate = learning_rate + 0.0005   
        y_train_pred = forward_propagation(train_data, W, bias)
        train_predictions = np.argmax(y_train_pred, axis=1)
        train_accuracy = np.mean(train_predictions == train_labels)
        print(f"accuracy {train_accuracy:.4f}, loss {avg_loss:.4f}, learning rate {learning_rate:.4f}, epoch {epoch+1}")

accuracy 0.8881, loss 2.4492, learning rate 0.0015, epoch 10
accuracy 0.7333, loss 2.4872, learning rate 0.0020, epoch 20
accuracy 0.8905, loss 2.5515, learning rate 0.0025, epoch 30
accuracy 0.8849, loss 2.5049, learning rate 0.0030, epoch 40
accuracy 0.8848, loss 2.5329, learning rate 0.0035, epoch 50
accuracy 0.8920, loss 2.5326, learning rate 0.0040, epoch 60
accuracy 0.8823, loss 2.5247, learning rate 0.0045, epoch 70
accuracy 0.8923, loss 2.5159, learning rate 0.0050, epoch 80
accuracy 0.8881, loss 2.5057, learning rate 0.0055, epoch 90
accuracy 0.8369, loss 2.5131, learning rate 0.0060, epoch 100
accuracy 0.8906, loss 2.5196, learning rate 0.0065, epoch 110
accuracy 0.8876, loss 2.5452, learning rate 0.0070, epoch 120
accuracy 0.8157, loss 2.5435, learning rate 0.0075, epoch 130
accuracy 0.8719, loss 2.5003, learning rate 0.0080, epoch 140
accuracy 0.8484, loss 2.4974, learning rate 0.0085, epoch 150


In [5]:
y_train_pred = forward_propagation(train_data, W, bias)
train_predictions = np.argmax(y_train_pred, axis=1)
train_accuracy = np.mean(train_predictions == train_labels)
print(f"final accuracy {train_accuracy:.4f}")

y_test_pred = forward_propagation(test_data, W, bias)
test_predictions = np.argmax(y_test_pred, axis=1)
print(f"prediction {test_predictions[:10]}")

final accuracy 0.8484
prediction [9 4 3 0 6 1 2 6 5 5]


In [6]:
predictions_csv = {
    "ID": [],
    "target": [],
}

for i, label in enumerate(test_predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)
print('submission done')

submission done
