# Imports

In [None]:
import copy
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from micrograd.loss import CrossEntropyLoss
from micrograd.engine import Value
from micrograd.nn import MLP
from micrograd.optimizer import SGD
from micrograd.functional import softmax
from micrograd.dataloader import DataLoader

# Data utilities

In [None]:
def load_train_val_data(data_root, mode='train'):
    assert mode.lower() in ['train', 'val'], f"Mode must be in {['train', 'val']}"
    data = pd.read_csv(os.path.join(data_root, 'train.csv'))
    train, val = train_test_split(data, random_state=42, test_size=0.2)
    if mode == 'train':
        data = train
    else:
        data = val
    images = data.iloc[:, 1:].to_numpy(dtype=np.float32)
    images /= 255.
    labels = data.iloc[:, 0].to_numpy(dtype=np.int32)
    return [(image, label) for image, label in zip(images, labels)]

In [None]:
def load_test_data(data_root):
    data = pd.read_csv(os.path.join(data_root, 'test.csv'))
    images = data.iloc[:].to_numpy(dtype=np.float32)
    images /= 255.
    return images

In [None]:
def shuffle_data(inputs, labels):
    assert len(inputs) == len(labels), "Number of input samples must match number of labels"
    indices = np.arange(len(inputs))
    np.random.shuffle(indices)
    inputs, labels = inputs[indices], labels[indices]

# Misc utilities

In [None]:
def probs_to_label(probs):
    labels = np.argmax(probs, axis=1)
    return labels

In [None]:
def calculate_accuracy(label, pred):
    return np.mean(label == pred)

# Train loop

In [None]:
DATA_ROOT = '/home/minh/datasets/MNIST-kaggle/'
LOG_ROOT = './logs/kaggle/softmax'
WEIGHTS_PATH = "mnist_mlp_kaggle.npz"

LEARNING_RATE = 0.01
EPOCHS = 20
BATCH_SIZE = 128

train_ds = load_train_val_data(DATA_ROOT, mode='train')
val_ds = load_train_val_data(DATA_ROOT, mode='val')
train_loader = DataLoader(train_ds, BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, BATCH_SIZE, shuffle=False)

model = MLP(nin=784, nouts=[32, 16, 10])
criterion = CrossEntropyLoss()
optimizer = SGD(model.parameters(), learning_rate=LEARNING_RATE)

logs = {
    'train_acc': [],
    'val_acc': [],
    'train_loss': []
}

best_val_acc = float('-inf')
best_model = None
for epoch in range(EPOCHS):
    train_acc = 0.
    train_loss = 0.
    for inputs, labels in train_loader:
        inputs = Value(inputs)
        
        logits = model(inputs)
        loss = criterion(logits, labels)
        train_loss += loss.data
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_probs = softmax(logits)
        train_preds = np.argmax(train_probs, axis=1)
        train_acc += np.mean(labels == train_preds)
        
        logs['train_acc'].append(np.mean(labels == train_preds))
        logs['train_loss'].append(loss.data)
        
    train_acc /= len(train_loader)
    train_loss /= len(train_loader)
    print(f"Epoch: {epoch + 1}. Train loss={train_loss}. Train acc={train_acc * 100:.2f}%")
    
    val_acc = 0.
    for inputs, labels in val_loader:
        inputs = Value(inputs)
        
        logits = model(inputs)            
        val_probs = softmax(logits)
        val_preds = np.argmax(val_probs, axis=1)
        val_acc += np.mean(labels == val_preds)
        
        logs['val_acc'].append(np.mean(labels == val_preds))
        
    val_acc /= len(val_loader)
    print(f"Epoch: {epoch + 1}. Val acc={val_acc * 100:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model = copy.deepcopy(model)
        
best_model.save_weights(WEIGHTS_PATH)

os.makedirs(LOG_ROOT, exist_ok=True)
plt.plot(logs['train_acc'], label='Train accuracy')
plt.xlabel('Iteration')
plt.legend()
plt.title('Training accuracy graph')
plt.savefig(os.path.join(LOG_ROOT, 'train_accuracy.jpg'))
plt.clf()
plt.plot(logs['val_acc'], label='Val accuracy')
plt.xlabel('Iteration')
plt.legend()
plt.title('Val accuracy graph')
plt.savefig(os.path.join(LOG_ROOT, 'val_accuracy.jpg'))
plt.clf()
plt.plot(logs['train_loss'], label='Train losses')
plt.xlabel('Iteration')
plt.legend()
plt.title('Training loss graph')
plt.savefig(os.path.join(LOG_ROOT, 'loss.jpg'))

# Run on test dataset

In [None]:
DATA_ROOT = '/home/minh/datasets/MNIST-kaggle/'
model = MLP(nin=784, nouts=[32, 16, 10])
model.load_weights(WEIGHTS_PATH)

test_images = load_test_data(DATA_ROOT)
test_images = Value(test_images)
test_outputs = model(test_images)
test_preds = probs_to_label(test_outputs.data)
test_preds = [(i + 1, v) for i, v in enumerate(test_preds)]
test_preds = pd.DataFrame(test_preds, columns=['ImageID', 'Label'])
test_preds.to_csv('submission.csv', index=False)