In [10]:
import numpy as np
import pandas as pd
import pickle
from models.mlp_digit import MLP
from pathlib import Path


In [3]:

def train_val_split(X, y, val_ratio=0.2):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    val_size = int(len(indices) * val_ratio)

    val_idx = indices[:val_size]
    train_idx = indices[val_size:]

    return X[train_idx], X[val_idx], y[train_idx], y[val_idx]

def plot_confusion_matrix(y_true, y_pred, classes):
    matrix = np.zeros((len(classes), len(classes)), dtype=int)
    for t, p in zip(y_true, y_pred):
        matrix[t, p] += 1

    print("\nConfusion Matrix:")
    print("True \\ Pred | " + "  ".join([str(c) for c in classes]))
    print("-" * 50)
    for i, row in enumerate(matrix):
        print(f"      {classes[i]}     | " + "  ".join([f"{count:3d}" for count in row]))


In [None]:
model_path = "models/best_model_2.pkl"
train_path = "data/train.csv"

print("Loading data...")
data = pd.read_csv(train_path)
X = data.iloc[:, 1:].values
y = data.iloc[:, 0].values

X = X / 255.0
X_train, X_val, y_train, y_val = train_val_split(X, y, val_ratio=0.2)

Loading data...


In [5]:

# base parameters
hidden_layer_best = [64, 64]
learning_rate_best = [0.01]
epochs_best = 15
batch_size_best = 64

# val accuracy
best_val_accuracy = 0

# all of the parameters
param_grid = {
    'hidden_layers': [[64, 64], [32], [64], [128], [256], [512], [32, 64], [64, 128], [128, 256],
                        [256, 512], [256, 128], [512], [256, 128, 64], [512, 256], [128, 64]],
    'learning_rate': [0.5, 0.1, 0.01, 0.001, 0.0001],
    'epochs': [10, 15, 20, 25, 30],
    'batch_size': [32, 64, 128]
}

In [6]:
# searching for optimal parameters started
for layers in param_grid["hidden_layers"]:
    for learning_rate in param_grid["learning_rate"]:
        for epochs in param_grid["epochs"]:
            for batch_size in param_grid["batch_size"]:
                model = MLP(hidden_layers=layers, learning_rate=learning_rate)
                model.train(X_train, y_train, epochs=epochs, batch_size=batch_size)
                preds = model.predict(X_val)
                acc = np.mean(preds == y_val)
                if acc > best_val_accuracy:
                    hidden_layer_best = layers
                    learning_rate_best = learning_rate
                    epochs_best = epochs
                    batch_size_best = batch_size
                    best_val_accuracy = acc


n_in : 784 , n_out : 64
n_in : 64 , n_out : 64
n_in : 64 , n_out : 10
Type of bs: <class 'int'>, and value: 32
M : 33600
Epoch: 0, Avg Loss: 0.3945
Epoch: 1, Avg Loss: 0.1828
Epoch: 2, Avg Loss: 0.1418
Epoch: 3, Avg Loss: 0.1175
Epoch: 4, Avg Loss: 0.1020
Epoch: 5, Avg Loss: 0.0954
Epoch: 6, Avg Loss: 0.0933
Epoch: 7, Avg Loss: 0.0821
Epoch: 8, Avg Loss: 0.0780
Epoch: 9, Avg Loss: 0.0755
n_in : 784 , n_out : 64
n_in : 64 , n_out : 64
n_in : 64 , n_out : 10
Type of bs: <class 'int'>, and value: 64
M : 33600
Epoch: 0, Avg Loss: 0.3932
Epoch: 1, Avg Loss: 0.1585
Epoch: 2, Avg Loss: 0.1160
Epoch: 3, Avg Loss: 0.0946
Epoch: 4, Avg Loss: 0.0720
Epoch: 5, Avg Loss: 0.0661
Epoch: 6, Avg Loss: 0.0534
Epoch: 7, Avg Loss: 0.0477
Epoch: 8, Avg Loss: 0.0424
Epoch: 9, Avg Loss: 0.0382
n_in : 784 , n_out : 64
n_in : 64 , n_out : 64
n_in : 64 , n_out : 10
Type of bs: <class 'int'>, and value: 128
M : 33600
Epoch: 0, Avg Loss: 0.4787
Epoch: 1, Avg Loss: 0.1730
Epoch: 2, Avg Loss: 0.1269
Epoch: 3, Avg L

In [7]:
print(f"Best model: hidden layer {hidden_layer_best}, lr: {learning_rate_best}, epochs: {epochs_best}, bs: {batch_size_best}")
print(f"Val accuracy: {best_val_accuracy}")

best_model = MLP(hidden_layers=hidden_layer_best, learning_rate= learning_rate_best)
best_model.train(X_train, y_train, epochs=epochs_best, batch_size=batch_size_best)
y_pred = best_model.predict(X_val)

plot_confusion_matrix(y_val, y_pred, classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


Best model: hidden layer [256, 512], lr: 0.5, epochs: 20, bs: 32
Val accuracy: 0.9821428571428571
n_in : 784 , n_out : 256
n_in : 256 , n_out : 512
n_in : 512 , n_out : 10
Type of bs: <class 'int'>, and value: 32
M : 33600
Epoch: 0, Avg Loss: 0.3077
Epoch: 1, Avg Loss: 0.1216
Epoch: 2, Avg Loss: 0.0819
Epoch: 3, Avg Loss: 0.0649
Epoch: 4, Avg Loss: 0.0494
Epoch: 5, Avg Loss: 0.0374
Epoch: 6, Avg Loss: 0.0312
Epoch: 7, Avg Loss: 0.0254
Epoch: 8, Avg Loss: 0.0177
Epoch: 9, Avg Loss: 0.0172
Epoch: 10, Avg Loss: 0.0233
Epoch: 11, Avg Loss: 0.0153
Epoch: 12, Avg Loss: 0.0099
Epoch: 13, Avg Loss: 0.0110
Epoch: 14, Avg Loss: 0.0157
Epoch: 15, Avg Loss: 0.0101
Epoch: 16, Avg Loss: 0.0024
Epoch: 17, Avg Loss: 0.0015
Epoch: 18, Avg Loss: 0.0002
Epoch: 19, Avg Loss: 0.0001

Confusion Matrix:
True \ Pred | 0  1  2  3  4  5  6  7  8  9
--------------------------------------------------
      0     | 805    0    2    0    0    0    6    0    0    1
      1     |   1  959    2    0    3    0    2    

In [12]:
model_path = Path("models/best_model_2.pkl")
model_path.parent.mkdir(parents=True, exist_ok=True)
with open(model_path, 'wb') as f:
    pickle.dump(best_model, f)
print(f"Model saved to {model_path}")


Model saved to models/best_model_2.pkl
