In [1]:
"""
Notebook that runs grid searches for different neural network sizes to find best hyperparameters
with basket (2-dim) option prices generated by both Merton Jump Diffusion and NIG models,
then trains the models with those parameters and evaluates the performance on unseen data
"""


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from tensorflow import keras
from tensorflow.random import set_seed
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from math import floor
import os

path = os.path.abspath(os.getcwd())
data_path = path + '\\OptionData\\'
result_path = path + '\\Results\\'
plot_path = path + '\\Plots\\'

np.random.seed(seed=123)
set_seed(1234)

# 1 Prepare Data

In [2]:
T = [0.3, 1, 3]

In [3]:
def get_train_test_data(model_name):
    df = pd.read_csv(data_path + '2_dim_{}_prices.csv'.format(model_name))
    X = np.array(df[['s1', 's2']])
    y = np.array(df[['0.3', '1.0', '3.0']])
    X_train, X_test, y_train_all, y_test_all = train_test_split(X, y, test_size=0.2)
    y_train_dict, y_test_dict = [{0.3: y_arr.T[0], 1: y_arr.T[1], 3: y_arr.T[2]} for y_arr in (y_train_all, y_test_all)]
    return X_train, X_test, y_train_dict, y_test_dict

In [4]:
"""
Merton model data
"""

X_train_mert, X_test_mert, y_train_mert, y_test_mert = get_train_test_data('merton')

In [5]:
"""
NIG model data
"""

X_train_nig, X_test_nig, y_train_nig, y_test_nig = get_train_test_data('nig')

# 2 Grid Search

## 2.1 Helper Functions

In [6]:
"""
Helper function to create model from given number of weights and hidden layers
"""

def create_model(hidden_layers=1, learning_rate=0.001, n_weights=30):
    # formulas derived from nWeights = sum (d(l-1)+1)*d(l) for all layers l with output dim d(l)
    
    if hidden_layers == 1: # 100% of neurons in first hidden layer
        neurons = [floor((n_weights - 1) / 4)]
    elif hidden_layers == 2: # 70% / 30% split of neurons
        x = 1/6 * (np.sqrt(84 * n_weights + 645) - 27)
        neurons = list(map(floor,[x, 3/7 * x]))
    elif hidden_layers == 3: # 50% / 30% / 20% split
        x = 5/21 * (np.sqrt(21 * n_weights + 100) - 11)
        neurons = list(map(floor, [x, 3/5 * x, 2/5 * x]))
    else:
        raise Exception('Only 1, 2 or 3 layers allowed')
        
    model = Sequential([Dense(neurons[0], activation='relu', input_dim=2)])
    for n in neurons[1:]:
        model.add(Dense(n, activation='relu'))
    model.add(Dense(1))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')

    return model


In [7]:
"""
Runs grid search for given train and test data with network of given size
"""

def run_grid_search(X_train, y_train, n_weights, n_epochs):
    
    batch_size = [64, 128, 256, 512]
    learning_rate = [0.1, 0.01, 0.001, 0.0001]
    hidden_layers = [1, 2, 3]

    p_grid = dict(hidden_layers=hidden_layers, batch_size=batch_size, learning_rate=learning_rate, epochs=n_epochs)
    
    def creator(hidden_layers, learning_rate): return create_model(hidden_layers, learning_rate, n_weights)
    
    model = KerasRegressor(creator, verbose=0)
    grid = GridSearchCV(estimator=model, param_grid=p_grid, n_jobs=-1, cv=4, verbose=3)
    grid_result = grid.fit(X_train, y_train)
    print('Best: {} using {}'.format(grid_result.best_score_, grid_result.best_params_))
    return grid_result

In [8]:
"""
Saves performance result for all parameters and best results for each network size
"""

def save_results(result_dict, pricer_name, maturity):

    columns = ['mean_fit_time', 'param_batch_size', 'param_hidden_layers', 'param_learning_rate', 
                'param_epochs', 'mean_test_score', 'rank_test_score']
    dfs = []
    best_dfs = []
    for n in result_dict.keys():
        df = pd.DataFrame(result_dict[n].cv_results_)[columns]
        df['n_weights'] = n
        df.columns = ['Training Time (s)', 'Batch Size', 'Hidden Layers', 'Learning Rate', 'Epochs',
                      'Validation MSE', 'Rank', 'Weights']
        df = df[['Weights', 'Validation MSE', 'Hidden Layers', 'Learning Rate', 'Batch Size',
                 'Epochs', 'Training Time (s)', 'Rank']]
        df['Validation MSE'] = (-df['Validation MSE']).apply(lambda x: '{:.3e}'.format(x))
        dfs.append(df)
        best_dfs.append(df[df['Rank'] == 1].drop(['Rank'], axis=1))
    result_df = pd.concat(dfs).reset_index(drop=True)
    best_result_df = pd.concat(best_dfs).reset_index(drop=True)
    result_df.to_csv(result_path + 'two_dim_{}_{}_grid_search_all_results.csv'.format(pricer_name, maturity), index=False)
    best_result_df.to_csv(result_path + 'two_dim_{}_{}_grid_search_overview.csv'.format(pricer_name, maturity), index=False)
    return best_result_df

## 2.2 Perform Grid Search

In [9]:
n_epochs = {30: [10, 25, 50, 100],
               60: [10, 25, 50, 100],
               100: [10, 25, 50, 100],
               300: [20, 50, 100, 150],
               600: [20, 50, 100, 150],
               1000: [20, 50, 100, 150],
               3000: [50, 100, 150, 200],
               6000: [50, 100, 150, 200],
               10000: [50, 100, 175, 300],
               30000: [100, 175, 300, 500]}

n_weights = n_epochs.keys()

In [10]:
"""
Merton Prices
"""

res_mert = {}
for t in T:
    print('T = {}'.format(t))
    res_mert[t] = {}
    for n in n_weights:
        print('Network Size = {}'.format(n))
        res_mert[t][n] = run_grid_search(X_train_mert, y_train_mert[t], n, n_epochs[n])
    res_mert[t] = save_results(res_mert[t], 'merton', t)
    display(res_mert[t])

T = 0.3
Network Size = 30
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -4.28140019721468e-05 using {'batch_size': 128, 'epochs': 10, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 60
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.4954877630989358e-05 using {'batch_size': 128, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.144497628364661e-06 using {'batch_size': 64, 'epochs': 25, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.252235375976852e-07 using {'batch_size': 256, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -7.96577817041566e-08 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 1000
Fitti

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,30,4.281e-05,1,0.1,128,10,2.005383
1,60,2.495e-05,1,0.01,128,50,3.394167
2,100,2.144e-06,1,0.01,64,25,3.031886
3,300,2.252e-07,1,0.01,256,100,3.532047
4,600,7.966e-08,1,0.01,256,150,5.472853
5,1000,4.64e-08,1,0.001,64,150,16.224579
6,3000,1.191e-08,1,0.01,256,150,12.98101
7,6000,2.583e-08,1,0.01,256,150,26.338263
8,10000,2.207e-08,1,0.0001,128,300,74.942685
9,30000,3.353e-09,1,0.01,256,500,348.025121


T = 1
Network Size = 30
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -6.956561219340074e-05 using {'batch_size': 128, 'epochs': 25, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 60
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.0809500256291358e-05 using {'batch_size': 512, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -5.243419764155988e-06 using {'batch_size': 128, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits




Best: -1.0794993965390631e-07 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -8.42718703708556e-08 using {'batch_size': 64, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.001}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -8.661898420747605e-08 using {'batch_size': 128, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.0893395629985037e-08 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 6000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.0724848897858408e-08 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 10000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.200740218

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,30,6.957e-05,1,0.1,128,25,2.121821
1,60,2.081e-05,1,0.1,512,100,2.22704
2,100,5.243e-06,1,0.01,128,50,3.362002
3,300,1.079e-07,1,0.01,256,150,4.33066
4,600,8.427e-08,1,0.001,64,100,11.096553
5,1000,8.662e-08,1,0.01,128,150,10.640773
6,3000,1.089e-08,1,0.01,256,150,10.955207
7,6000,1.072e-08,1,0.01,256,150,23.430544
8,10000,1.201e-08,1,0.0001,64,175,45.497238
9,30000,3.125e-09,1,0.0001,64,300,225.680773


T = 3
Network Size = 30
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -0.00011424766489653848 using {'batch_size': 128, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 60
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.6144598475875682e-05 using {'batch_size': 64, 'epochs': 10, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.775068537630432e-06 using {'batch_size': 128, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.459072281941644e-07 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.2413363403851463e-07 using {'batch_size': 64, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.001}
Network Size = 1000
F



Best: -4.1925267701969915e-09 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 10000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.859521702231916e-09 using {'batch_size': 256, 'epochs': 175, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 30000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -5.976799832829727e-09 using {'batch_size': 64, 'epochs': 300, 'hidden_layers': 1, 'learning_rate': 0.0001}


Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,30,0.0001142,1,0.01,128,100,4.921579
1,60,1.614e-05,1,0.1,64,10,2.036301
2,100,2.775e-06,1,0.01,128,100,7.279518
3,300,3.459e-07,1,0.01,256,150,4.854508
4,600,1.241e-07,1,0.001,64,150,14.851254
5,1000,9.9e-08,1,0.01,256,150,6.414583
6,3000,6.25e-09,1,0.01,256,150,11.259118
7,6000,4.193e-09,1,0.01,256,150,21.893657
8,10000,3.86e-09,1,0.01,256,175,48.79965
9,30000,5.977e-09,1,0.0001,64,300,224.870192


In [11]:
"""
NIG Prices
"""

np.random.seed(seed=123)
set_seed(1234)

res_nig = {}
for t in T:
    print('T = {}'.format(t))
    res_nig[t] = {}
    for n in n_weights:
        print('Network Size = {}'.format(n))
        res_nig[t][n] = run_grid_search(X_train_nig, y_train_nig[t], n, n_epochs[n])
    res_nig[t] = save_results(res_nig[t], 'nig', t)
    display(res_nig[t])   

T = 0.3
Network Size = 30
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.8798840023446246e-05 using {'batch_size': 256, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 60
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -6.639495666149742e-06 using {'batch_size': 128, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.345284400462333e-06 using {'batch_size': 64, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -7.453840353832675e-07 using {'batch_size': 64, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.001}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.9310833937756797e-07 using {'batch_size': 64, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.001}
Network Size = 1000
F

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,30,3.88e-05,1,0.1,256,100,3.513846
1,60,6.639e-06,1,0.1,128,50,2.934397
2,100,2.345e-06,1,0.01,64,100,9.676105
3,300,7.454e-07,1,0.001,64,150,15.232733
4,600,2.931e-07,1,0.001,64,100,12.276146
5,1000,2.077e-07,1,0.01,64,150,18.648093
6,3000,7.476e-08,1,0.01,256,150,11.895166
7,6000,3.177e-08,1,0.01,128,200,31.621373
8,10000,3.735e-08,2,0.0001,64,300,76.276116
9,30000,1.292e-08,3,0.0001,64,300,97.911999


T = 1
Network Size = 30
Fitting 4 folds for each of 192 candidates, totalling 768 fits




Best: -3.6615063891076716e-05 using {'batch_size': 512, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 60
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -9.39428036872414e-06 using {'batch_size': 512, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -4.2247732494615775e-06 using {'batch_size': 256, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.154393439785963e-07 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.8479046737240878e-07 using {'batch_size': 64, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.001}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.181364871540637e-0

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,30,3.662e-05,1,0.1,512,50,3.446028
1,60,9.394e-06,1,0.1,512,50,1.91662
2,100,4.225e-06,1,0.01,256,100,3.800829
3,300,3.154e-07,1,0.01,256,150,5.543166
4,600,1.848e-07,1,0.001,64,150,18.343409
5,1000,3.181e-08,1,0.01,256,150,8.025522
6,3000,2.647e-08,1,0.001,128,150,15.511238
7,6000,1.897e-08,1,0.01,256,150,27.427348
8,10000,2.73e-08,1,0.01,512,300,74.952927
9,30000,3.049e-09,1,0.0001,64,500,391.165895


T = 3
Network Size = 30
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.0983824470022228e-05 using {'batch_size': 64, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 60
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.7180125496452092e-05 using {'batch_size': 128, 'epochs': 10, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.962325851285641e-06 using {'batch_size': 256, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.354720542996347e-07 using {'batch_size': 512, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits




Best: -7.333988438773531e-08 using {'batch_size': 128, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -9.234348752329424e-08 using {'batch_size': 64, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.001}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -4.771984307705424e-09 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 6000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.3077761762403384e-08 using {'batch_size': 512, 'epochs': 200, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 10000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.3460788584349643e-09 using {'batch_size': 64, 'epochs': 300, 'hidden_layers': 1, 'learning_rate': 0.0001}
Network Size = 30000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.799477

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,30,2.098e-05,1,0.1,64,50,5.408276
1,60,1.718e-05,1,0.1,128,10,2.562891
2,100,2.962e-06,1,0.01,256,100,3.761932
3,300,2.355e-07,1,0.01,512,150,5.213297
4,600,7.334e-08,1,0.01,128,150,10.877888
5,1000,9.234e-08,1,0.001,64,100,9.568642
6,3000,4.772e-09,1,0.01,256,150,13.947673
7,6000,1.308e-08,1,0.01,512,200,27.157071
8,10000,3.346e-09,1,0.0001,64,300,80.847632
9,30000,2.799e-09,1,0.0001,128,500,366.398713


# 3 Run Models on Test Data

In [10]:
np.random.seed(seed=123)
set_seed(1234)

In [11]:
"""
Creates overview with best performing models for each weight and number of hidden layers from grid search
"""

def get_performance_overview(pricer, maturity):
    result_df = pd.read_csv(result_path + 'two_dim_{}_{}_grid_search_all_results.csv'.format(pricer, maturity))
    dfs = []
    for hidden in [1, 2, 3]:
        df = result_df[result_df['Hidden Layers'] == hidden]
        # filter best results for each number of weights
        idx = df.groupby(['Weights'])['Validation MSE'].transform(min) == df['Validation MSE']
        df = df[idx].sort_values('Weights')
        # filter for best run time for each number of weights (only if multiple models have same MSE)
        idx = df.groupby(['Weights'])['Training Time (s)'].transform(min) == df['Training Time (s)']
        df = df[idx].sort_values('Weights')
        dfs.append(df)
    return pd.concat(dfs).reset_index(drop=True)

In [12]:
"""
Trains and evaluates all models with best parameters (from grid search) for a given pricing model and maturity
with early stopping
"""
from time import time

def run_tests(df_overview, X_train, y_train, X_test, y_test, pricer, maturity):
    test_mse = []
    train_epochs = []
    train_time = []
    earlyStop = keras.callbacks.EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)    
    for weights, hidden, lr, batch in zip(df_overview['Weights'], df_overview['Hidden Layers'], 
                                          df_overview['Learning Rate'], df_overview['Batch Size']):
        model = create_model(hidden, float(lr), weights)
        start = time()
        hist = model.fit(X_train, y_train, epochs=5000, batch_size=batch, verbose=0, callbacks=[earlyStop])
        train_time.append(time() - start)
        test_mse.append(model.evaluate(X_test, y_test, verbose=0))
        train_epochs.append(hist.epoch[-1])
    df_overview['Test MSE'] = test_mse
    df_overview['Epochs (Early Stop)'] = train_epochs
    df_overview['Training Time (s)'] = train_time
    df_overview['Test MSE'] = df_overview['Test MSE'].apply(lambda x: '{:.3e}'.format(x))
    df_best = df_overview[df_overview['Rank'] == 1].sort_values('Weights')
    df_best = df_best[['Weights', 'Test MSE', 'Hidden Layers', 'Learning Rate', 'Batch Size',
                           'Epochs (Early Stop)', 'Training Time (s)']]
    df_overview.to_csv(result_path + 'two_dim_{}_{}_test_all_hidden.csv'.format(pricer, maturity), index=False)
    df_best.to_csv(result_path + 'two_dim_{}_{}_test_overview.csv'.format(pricer, maturity), index=False)
    return df_best

In [13]:
"""
Load all grid search overviews
"""

overviews = {}

for pricer in ['merton', 'nig']:
    overviews[pricer] = {}
    for t in T:
        overviews[pricer][t] = get_performance_overview(pricer, t)

In [14]:
"""
Run tests on Merton model data
"""

tests_mert = {}

for t in T:
    tests_mert[t] = run_tests(overviews['merton'][t], X_train_mert, y_train_mert[t], X_test_mert, y_test_mert[t],
                              'merton', t)
    display(tests_mert[t])

Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,30,0.0001355,1,0.1,128,12,0.619539
1,60,2.224e-05,1,0.01,128,46,1.243558
2,100,2.729e-06,1,0.01,64,21,1.245661
3,300,3.43e-07,1,0.01,256,43,0.833024
4,600,2.707e-07,1,0.01,256,39,0.852093
5,1000,3.351e-08,1,0.001,64,79,4.009097
6,3000,1.956e-07,1,0.01,256,43,1.243764
7,6000,6.592e-08,1,0.01,256,67,3.252337
8,10000,5.756e-08,1,0.0001,128,199,14.597436
9,30000,1.257e-07,1,0.01,256,51,6.695072


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,30,5.203e-05,1,0.1,128,13,0.622335
1,60,1.118e-05,1,0.1,512,69,0.754951
2,100,3.651e-06,1,0.01,128,41,1.185114
3,300,4.161e-07,1,0.01,256,57,1.111089
4,600,5.566e-08,1,0.001,64,74,3.765536
5,1000,1.589e-07,1,0.01,128,47,1.595018
6,3000,4.485e-08,1,0.01,256,52,1.408397
7,6000,7.079e-08,1,0.01,256,49,2.276167
8,10000,7.39e-08,1,0.0001,64,136,12.373397
9,30000,3.474e-08,1,0.0001,64,91,18.407783


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,30,0.002269,1,0.01,128,7,0.431964
1,60,1.774e-05,1,0.1,64,13,0.959408
2,100,7.417e-06,1,0.01,128,100,2.182676
3,300,3.74e-07,1,0.01,256,62,1.037662
4,600,1.133e-07,1,0.001,64,77,3.748373
5,1000,5.919e-08,1,0.01,256,151,2.6953
6,3000,9.214e-09,1,0.01,256,75,2.070772
7,6000,4.688e-08,1,0.01,256,51,2.271955
8,10000,2.177e-08,1,0.01,256,67,4.408187
9,30000,4.183e-08,1,0.0001,64,125,24.463598


In [15]:
"""
Run tests on NIG model data
"""

tests_nig = {}

for t in T:
    tests_nig[t] = run_tests(overviews['nig'][t], X_train_nig, y_train_nig[t], X_test_nig, y_test_nig[t],
                              'nig', t)
    display(tests_nig[t])

Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,30,4.788e-05,1,0.1,256,11,0.66522
1,60,3.449e-06,1,0.1,128,20,0.836762
2,100,2.787e-06,1,0.01,64,29,1.531902
3,300,1.085e-06,1,0.001,64,91,3.900599
4,600,2.506e-07,1,0.001,64,86,4.338402
5,1000,9.171e-07,1,0.01,64,20,1.487983
6,3000,2.265e-07,1,0.01,256,64,1.530908
7,6000,3.465e-07,1,0.01,128,40,1.721398
18,10000,2.565e-07,2,0.0001,64,149,6.966472
29,30000,2.255e-07,3,0.0001,64,91,5.888258


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,30,0.0001241,1,0.1,512,38,0.714119
1,60,3.895e-05,1,0.1,512,32,0.562497
2,100,7.899e-06,1,0.01,256,78,1.031388
3,300,9.381e-07,1,0.01,256,67,1.116016
4,600,3.727e-07,1,0.001,64,71,2.958172
5,1000,2.404e-07,1,0.01,256,53,0.987361
6,3000,7.643e-08,1,0.001,128,68,2.420529
7,6000,9.762e-08,1,0.01,256,67,3.239395
8,10000,9.696e-08,1,0.01,512,125,6.38493
9,30000,1.096e-07,1,0.0001,64,108,21.129513


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,30,3.562e-05,1,0.1,64,10,0.677197
1,60,6.427e-05,1,0.1,128,10,0.601362
2,100,2.979e-05,1,0.01,256,48,0.955443
3,300,9.236e-08,1,0.01,512,150,1.433222
4,600,5.177e-08,1,0.01,128,87,2.183419
5,1000,3.955e-08,1,0.001,64,70,3.395921
6,3000,2.155e-08,1,0.01,256,53,1.340416
7,6000,4.645e-08,1,0.01,512,93,3.449748
8,10000,3.238e-08,1,0.0001,64,131,9.805766
9,30000,3.124e-08,1,0.0001,128,200,33.052636
