# Best Model

In [None]:
# Importing all the modules and functions related to constructing MLP network models

from network.net import Net                   # Net class, base class for constructing MLP networks
from network.layer import Linear              # Linear class, child class of parent class Layer 
from network.loss import CrossEntropyLoss     # CrossEntropyLoss class, child class of parent class Loss
from network.activ import ReLU, LeakyReLU     # ReLU, LeakyReLU classes, child classes of parent class Activation
from network.optim import SGD, Adam           # SGD, Adam classes, child classes of parent class Optimizer

In [None]:
# Importing all the modules and functions related to data processing including loaders for the assignment data

# Process module contains functions relating to data processing:
from network.loader.process import (
    train_test_split,        # Function to split data with chosen ratio, data can be shuffled
    normalize,               # Normalizes data to have mean of zero and unit variance
    standardize,             # Normalizes data to be between range 0-1, i.e. standardizes data
    one_hot,                 # One hot encoding: 100% prob of 2 is [0, 0, 1] with 3 classes
    pca                      # Reduces data to chosen K principal components
) 

# Data module for loading the assignment data
from network.dataset.source import (
    get_data_from_file,   # Loads assignment data from file (must be within main directory)
    get_data_from_url     # Loads assignment data from public GitHub repo that stores data
)

# Data loader module for automating processing of and loading of assignment data based on parameter selections
from network.loader.data_loader import load_train_val_test  # Parameter selections decide method of processing

import numpy as np
import matplotlib.pyplot as pl
import pandas as pd 
import seaborn as sns

# setting random seed
np.random.seed(90)

### Plotting Helper Functions

In [None]:
def plot_results(stats):
    ep, tl, ta, vl, va = stats
    pl.figure(figsize = (10, 7))

    fig, ((ax1, ax2), (ax3, ax4)) = pl.subplots(2, 2)
    fig.suptitle(f'Training Results, best model found @ Epoch {ep}')

    ax1.plot(tl)
    ax1.set_title('Training Loss')

    ax2.plot(vl, 'tab:orange')
    ax2.set_title('Validation Loss')

    ax3.plot(ta, 'tab:green')
    ax3.set_title('Training Accuracy')

    ax4.plot(va, 'tab:red')
    ax4.set_title('Validation Accuracy')
    
    for ax in fig.get_axes():
        ax.label_outer()

    pl.show()
    
def confusion_matrix(pred, label):
    x, y = len(np.unique(pred)), len(np.unique(label))
    matrix = np.zeros((x, y))
    for i in range(len(pred)):
        m, n = pred[i], label[i]
        matrix[m, n] += 1
    return matrix       

## Loading Data

In [None]:
# Note as we are loading data from URL it will take longer than from file.
train_set, valid_set, test_set = load_train_val_test(
    source = "url", 
    method = "standardize", 
    pca_N = 0, 
    n_categories = 10, 
    ratio = 0.2, 
    shuffle = True
)  

# Model Initialization

##### Changed learning_rate = 0.01 (from 0.001)

In [None]:
mlp = Net(
    optimizer = Adam(
        learning_rate = 0.01  # Default value
    ),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

mlp.add(Linear(128, 1024, dropout=0.4))
mlp.add(ReLU())
mlp.add(Linear(1024, 512, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(512, 64, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(64, 16, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(16, 10))  


mlp.set_name("Best_model")
mlp.save_model()  # for reload

print(f"{mlp.model_name} is initialized and ready to be trained.")

## Train and Evaluate Model

In [None]:
stats = mlp.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

### Plotting Epoch-wise Loss & Accuracy Curve

In [None]:
plot_results(stats)  # plot curves

### Checking Accuracy of Best Model

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + mlp.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")

### Confusion Matrix

In [None]:
# confusion matrix of training data

pred = best_model.predict(train_set[0], train_set[1].shape[1])
pred_train_labels = np.argmax(pred, axis=1)

matrix = confusion_matrix(pred_train_labels, np.argmax(train_set[1], axis=1))
matrix = pd.DataFrame(matrix, index = np.arange(10), columns = np.arange(10))

pl.figure(figsize = (10,7))
sns.heatmap(matrix, annot=True)
pl.show()

In [None]:
# confusion matrix of test data

pred = best_model.predict(test_set[0], test_set[1].shape[1])
pred_test_labels = np.argmax(pred, axis=1)

matrix = confusion_matrix(pred_test_labels, np.argmax(test_set[1], axis=1))
matrix_df = pd.DataFrame(matrix, index = np.arange(10), columns = np.arange(10))

pl.figure(figsize = (10,7))
sns.heatmap(matrix, annot=True)
pl.show()

### F1 Score

In [4]:
# Only used in evaluation
from sklearn.metrics import f1_score

f1_class = f1_score(test_set[1], pred_test_labels, average = 'weighted')
    
print("F1 Score:", np.round(f1_class, 5))

ModuleNotFoundError: No module named 'sklearn'