# Hyper-Parameter Tuning # Hidden Layers

In [1]:
# Importing all the modules and functions related to constructing MLP network models

from network.net import Net                   # Net class, base class for constructing MLP networks
from network.layer import Linear              # Linear class, child class of parent class Layer 
from network.loss import CrossEntropyLoss     # CrossEntropyLoss class, child class of parent class Loss
from network.activ import ReLU, LeakyReLU     # ReLU, LeakyReLU classes, child classes of parent class Activation
from network.optim import SGD, Adam           # SGD, Adam classes, child classes of parent class Optimizer

In [2]:
# Importing all the modules and functions related to data processing including loaders for the assignment data

# Process module contains functions relating to data processing:
from network.loader.process import (
    train_test_split,        # Function to split data with chosen ratio, data can be shuffled
    normalize,               # Normalizes data to have mean of zero and unit variance
    standardize,             # Normalizes data to be between range 0-1, i.e. standardizes data
    one_hot,                 # One hot encoding: 100% prob of 2 is [0, 0, 1] with 3 classes
    pca                      # Reduces data to chosen K principal components
) 

# Data module for loading the assignment data
from network.dataset.source import (
    get_data_from_file,   # Loads assignment data from file (must be within main directory)
    get_data_from_url     # Loads assignment data from public GitHub repo that stores data
)

# Data loader module for automating processing of and loading of assignment data based on parameter selections
from network.loader.data_loader import load_train_val_test  # Parameter selections decide method of processing

import numpy as np
import matplotlib.pyplot as pl
import pandas as pd 
import seaborn as sns

# setting random seed
np.random.seed(123)

In [3]:
def plot_results(stats):
    ep, tl, ta, vl, va = stats
    pl.figure(figsize = (10, 7))

    fig, ((ax1, ax2), (ax3, ax4)) = pl.subplots(2, 2)
    fig.suptitle(f'Training Results, best model found @ Epoch {ep}')

    ax1.plot(tl)
    ax1.set_title('Training Loss')

    ax2.plot(vl, 'tab:orange')
    ax2.set_title('Validation Loss')

    ax3.plot(ta, 'tab:green')
    ax3.set_title('Training Accuracy')

    ax4.plot(va, 'tab:red')
    ax4.set_title('Validation Accuracy')
    
    for ax in fig.get_axes():
        ax.label_outer()

    pl.show()

# Note as we are loading data from URL it will take longer than from file.
train_set, valid_set, test_set = load_train_val_test(
    source = "url", method = "standardize", pca_N = 0, n_categories = 10, ratio = 0.2, shuffle = True
)         

## Current Best Model

In [4]:
mlp = Net(
    optimizer = Adam(),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

mlp.add(Linear(128, 1024, dropout=0.4))
mlp.add(ReLU())
mlp.add(Linear(1024, 512, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(512, 64, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(64, 16, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(16, 10))  


mlp.set_name("Adam_network")
print(f"{mlp.model_name} is initialized and ready to be trained.")

Adam_network is initialized and ready to be trained.


## 0: Train and Evaluate Current Best Model

In [None]:
best = mlp.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

  1%|█▍                                                                                                                                           | 1/100 [00:08<13:40,  8.29s/it]

Epoch: 0	Interval Time: 8.3 sec(s)	Training Loss: 2.213828		Training Accuracy: 0.226825
						Validation Loss:1.867397	Validation Accuracy: 0.350100


  4%|█████▋                                                                                                                                       | 4/100 [00:33<13:38,  8.52s/it]

In [None]:
plot_results(best)

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + mlp.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")

# 1: Removing a layer

Note: need to ensure output dims match input dims of next layer:

### Scenario A

In [None]:
remA = Net(
    optimizer = Adam(),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

remA.add(Linear(128, 1024, dropout=0.4))
remA.add(ReLU())
remA.add(Linear(1024, 512, dropout=0.2))
remA.add(ReLU())
remA.add(Linear(512, 16, dropout=0.2))
remA.add(ReLU())
#mlp.add(Linear(64, 16, dropout=0.2))   # removing this layer
#mlp.add(ReLU())
remA.add(Linear(16, 10))  

### Train and Evaluate Scenario A

In [None]:

remA.set_name("remA")
print(f"{mlp.model_name} is initialized and ready to be trained.")

remAT = remA.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

In [None]:
plot_results(remAT)

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + remA.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")

### Scenario B

In [None]:
remB = Net(
    optimizer = Adam(),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

remB.add(Linear(128, 1024, dropout=0.4))
remB.add(ReLU())
remB.add(Linear(1024, 64, dropout=0.2))
remB.add(ReLU())
#remB.add(Linear(512, 16, dropout=0.2))  # removing this layer
#remB.add(ReLU())
remB.add(Linear(64, 16, dropout=0.2))   
remB.add(ReLU())
remB.add(Linear(16, 10))

### Train and Evaluate Scenario B

In [None]:
remB.set_name("remB")
print(f"{mlp.model_name} is initialized and ready to be trained.")

remB_stat = remB.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

In [None]:
plot_results(remB_stat)

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + remB.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")

### Scenario C

In [None]:
remC = Net(
    optimizer = Adam(),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

remC.add(Linear(128, 1024, dropout=0.4))
remC.add(ReLU())
#remC.add(Linear(1024, 64, dropout=0.2))   # removing this layer
#remC.add(ReLU())
remC.add(Linear(1024, 64, dropout=0.2))  
remC.add(ReLU())
remC.add(Linear(64, 16, dropout=0.2))   
remC.add(ReLU())
remC.add(Linear(16, 10))

### Train and Evaluate Scenario C

In [None]:

remC.set_name("remC")
print(f"{remC.model_name} is initialized and ready to be trained.")

remC_stat = remC.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

In [None]:
plot_results(remC_stat)

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + remC.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")

# 2: Adding a Layer

For consistency, let's choose an output dimension for an added hidden layer to be half the size of the output dims in the previous layer.

### Scenario A

In [None]:
mlp = Net(
    optimizer = Adam(),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

mlp.add(Linear(128, 1024, dropout=0.4))
mlp.add(ReLU())
mlp.add(Linear(1024, 512, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(512, 64, dropout=0.2))
mlp.add(ReLU())
# Adding layer
mlp.add(Linear(64, 32, dropout=0.2))  # half above output
mlp.add(ReLU())
mlp.add(Linear(32, 16, dropout=0.2))  # reduce 64 to match 32
mlp.add(ReLU())
mlp.add(Linear(16, 10))  


mlp.set_name("Adam_network")
print(f"{mlp.model_name} is initialized and ready to be trained.")

### Train and Evaluate Scenario A

In [None]:

mlp.set_name("add")
print(f"{mlp.model_name} is initialized and ready to be trained.")

addA = mlp.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

In [None]:
plot_results(add_stat)

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + mlp.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")

### Scenario B

In [None]:
mlp = Net(
    optimizer = Adam(),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

mlp.add(Linear(128, 1024, dropout=0.4))
mlp.add(ReLU())
mlp.add(Linear(1024, 512, dropout=0.2))
mlp.add(ReLU())
# adding layer
mlp.add(Linear(512, 64, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(64, 32, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(32, 16, dropout=0.2))  
mlp.add(ReLU())
mlp.add(Linear(16, 10))  


mlp.set_name("Adam_network")
print(f"{mlp.model_name} is initialized and ready to be trained.")

### Train and Evaluate Scenario B

In [None]:

mlp.set_name("add2")
print(f"{mlp.model_name} is initialized and ready to be trained.")

addB = mlp.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

In [None]:
plot_results(add_stat)

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + mlp.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")

## Scenario C

In [None]:
mlp = Net(
    optimizer = Adam(),
    criterion = CrossEntropyLoss(),
    batch_norm = True,
    L2_reg_term = 0.001
)

mlp.add(Linear(128, 1024, dropout=0.4))
mlp.add(ReLU())
# adding layer
mlp.add(Linear(1024, 512, dropout=0.4))
mlp.add(ReLU())
mlp.add(Linear(512, 128, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(128, 32, dropout=0.2))
mlp.add(ReLU())
mlp.add(Linear(32, 16, dropout=0.2))  
mlp.add(ReLU())
mlp.add(Linear(16, 10))  


mlp.set_name("Adam_network")
print(f"{mlp.model_name} is initialized and ready to be trained.")

## Train and Evaluate Scenario C

In [None]:

mlp.set_name("add3")
print(f"{mlp.model_name} is initialized and ready to be trained.")

addC = mlp.train_convergence(
    train_set = train_set,
    valid_set = valid_set,
    batch_size = 500,
    planned_epochs = 100,
    last_check = 10,
    threshold = 1e-25,
    report_interval = 5
)

{"tags": ["hide-output"]}

In [None]:
plot_results(add_stat)

In [None]:
# Loading best model found:

best_model = Net.load_model("model/" + mlp.model_name)
best_model.test_network(train_set, "train data")
best_model.test_network(valid_set, "valid data")
best_model.test_network(test_set, "test data")