In [1]:
"""
Notebook that runs grid searches for different neural network sizes to find best hyperparameters
with basket (30-dim) option prices generated by both Merton Jump Diffusion and NIG models,
then trains the models with those parameters and evaluates the performance on unseen data
"""


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from tensorflow import keras
from tensorflow.random import set_seed
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from math import floor
import os

path = os.path.abspath(os.getcwd())
data_path = path + '\\OptionData\\'
result_path = path + '\\Results\\'

np.random.seed(seed=123)
set_seed(1234)

# 1 Prepare Data

In [2]:
T = [0.3, 1, 3]

In [3]:
def get_train_test_data(model_name):
    df = pd.read_csv(data_path + '30_dim_{}_prices.csv'.format(model_name))
    X = np.array(df.drop(['0.3', '1.0', '3.0'], axis=1))
    y = np.array(df[['0.3', '1.0', '3.0']])
    X_train, X_test, y_train_all, y_test_all = train_test_split(X, y, test_size=0.2)
    y_train_dict, y_test_dict = [{0.3: y_arr.T[0], 1: y_arr.T[1], 3: y_arr.T[2]} for y_arr in (y_train_all, y_test_all)]
    return X_train, X_test, y_train_dict, y_test_dict

In [4]:
"""
Merton model data
"""

X_train_mert, X_test_mert, y_train_mert, y_test_mert = get_train_test_data('merton')

In [5]:
"""
NIG model data
"""

X_train_nig, X_test_nig, y_train_nig, y_test_nig = get_train_test_data('nig')

# 2 Grid Search

## 2.1 Helper Functions

In [6]:
"""
Helper function to create model from given number of weights and hidden layers
"""

def create_model(hidden_layers=1, learning_rate=0.001, n_weights=30):
    # formulas derived from nWeights = sum (d(l-1)+1)*d(l) for all layers l with output dim d(l)
    
    if hidden_layers == 1: # 100% of neurons in first hidden layer
        neurons = [floor((n_weights - 1) / 32)]
    elif hidden_layers == 2: # 70% / 30% split of neurons
        x = 1/6 * (np.sqrt(84 * n_weights + 49645) - 223)
        neurons = list(map(floor,[x, 3/7 * x]))
    elif hidden_layers == 3: # 50% / 30% / 20% split
        x = 5/21 * (np.sqrt(21 * n_weights + 6540) - 81)
        neurons = list(map(floor, [x, 3/5 * x, 2/5 * x]))
    else:
        raise Exception('Only 1, 2 or 3 layers allowed')
        
    model = Sequential([Dense(neurons[0], activation='relu', input_dim=30)])
    for n in neurons[1:]:
        model.add(Dense(n, activation='relu'))
    model.add(Dense(1))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')

    return model


In [7]:
"""
Runs grid search for given train and test data with network of given size
"""

def run_grid_search(X_train, y_train, n_weights, n_epochs):
    
    batch_size = [64, 128, 256, 512]
    learning_rate = [0.1, 0.01, 0.001, 0.0001]
    hidden_layers = [1, 2, 3]

    p_grid = dict(hidden_layers=hidden_layers, batch_size=batch_size, learning_rate=learning_rate, epochs=n_epochs)
    
    def creator(hidden_layers, learning_rate): return create_model(hidden_layers, learning_rate, n_weights)
    
    model = KerasRegressor(creator, verbose=0)
    grid = GridSearchCV(estimator=model, param_grid=p_grid, n_jobs=-1, cv=4, verbose=3)
    grid_result = grid.fit(X_train, y_train)
    print('Best: {} using {}'.format(grid_result.best_score_, grid_result.best_params_))
    return grid_result

In [8]:
"""
Saves performance result for all parameters and best results for each network size
"""

def save_results(result_dict, pricer_name, maturity):

    columns = ['mean_fit_time', 'param_batch_size', 'param_hidden_layers', 'param_learning_rate', 
                'param_epochs', 'mean_test_score', 'rank_test_score']
    dfs = []
    best_dfs = []
    for n in result_dict.keys():
        df = pd.DataFrame(result_dict[n].cv_results_)[columns]
        df['n_weights'] = n
        df.columns = ['Training Time (s)', 'Batch Size', 'Hidden Layers', 'Learning Rate', 'Epochs',
                      'Validation MSE', 'Rank', 'Weights']
        df = df[['Weights', 'Validation MSE', 'Hidden Layers', 'Learning Rate', 'Batch Size',
                 'Epochs', 'Training Time (s)', 'Rank']]
        df['Validation MSE'] = (-df['Validation MSE']).apply(lambda x: '{:.3e}'.format(x))
        dfs.append(df)
        best_dfs.append(df[df['Rank'] == 1].drop(['Rank'], axis=1))
    result_df = pd.concat(dfs).reset_index(drop=True)
    best_result_df = pd.concat(best_dfs).reset_index(drop=True)
    result_df.to_csv(result_path + 'thirty_dim_{}_{}_grid_search_all_results.csv'.format(pricer_name, maturity), index=False)
    best_result_df.to_csv(result_path + 'thirty_dim_{}_{}_grid_search_overview.csv'.format(pricer_name, maturity), index=False)
    return best_result_df

## 2.2 Perform Grid Search

In [9]:
n_epochs = {100: [10, 25, 50, 100],
               300: [20, 50, 100, 150],
               600: [20, 50, 100, 150],
               1000: [20, 50, 100, 150],
               3000: [50, 100, 150, 200],
               6000: [50, 100, 150, 200],
               10000: [50, 100, 175, 300],
               30000: [100, 175, 300, 500]}

n_weights = n_epochs.keys()

In [10]:
"""
Merton Prices
"""

res_mert = {}
for t in T:
    print('T = {}'.format(t))
    res_mert[t] = {}
    for n in n_weights:
        print('Network Size = {}'.format(n))
        res_mert[t][n] = run_grid_search(X_train_mert, y_train_mert[t], n, n_epochs[n])
    res_mert[t] = save_results(res_mert[t], 'merton', t)
    display(res_mert[t])

T = 0.3
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -0.00013519172671294655 using {'batch_size': 512, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -7.47514729937393e-06 using {'batch_size': 64, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -6.8529977568232425e-06 using {'batch_size': 512, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -6.645222399015438e-07 using {'batch_size': 128, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -5.6336026510450665e-08 using {'batch_size': 256, 'epochs': 200, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 60

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,100,0.0001352,1,0.01,512,100,2.356949
1,300,7.475e-06,1,0.01,64,50,3.927251
2,600,6.853e-06,1,0.01,512,50,1.970482
3,1000,6.645e-07,1,0.01,128,50,3.102207
4,3000,5.634e-08,1,0.01,256,200,6.077503
5,6000,4.215e-08,2,0.001,64,100,9.178462
6,10000,2.998e-08,2,0.0001,64,300,28.005627
7,30000,2.72e-09,1,0.0001,64,500,72.039154


T = 1
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -0.00024730225413804874 using {'batch_size': 512, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.6507884424754593e-05 using {'batch_size': 512, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.6571483147108665e-06 using {'batch_size': 512, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -5.037265431440119e-07 using {'batch_size': 128, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits




Best: -3.80241687025773e-08 using {'batch_size': 64, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 6000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -5.718342244165342e-08 using {'batch_size': 256, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 10000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.3344429439807755e-08 using {'batch_size': 128, 'epochs': 175, 'hidden_layers': 2, 'learning_rate': 0.001}
Network Size = 30000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -6.1437884779635965e-09 using {'batch_size': 128, 'epochs': 175, 'hidden_layers': 2, 'learning_rate': 0.001}


Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,100,0.0002473,1,0.1,512,100,2.591322
1,300,1.651e-05,1,0.1,512,150,3.312144
2,600,2.657e-06,1,0.01,512,150,3.566714
3,1000,5.037e-07,1,0.01,128,100,5.718213
4,3000,3.802e-08,1,0.01,64,50,5.411533
5,6000,5.718e-08,1,0.01,256,100,4.399987
6,10000,3.334e-08,2,0.001,128,175,158.549869
7,30000,6.144e-09,2,0.001,128,175,17.711648


T = 3
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -0.0003125266666756943 using {'batch_size': 64, 'epochs': 10, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.146569866250502e-05 using {'batch_size': 128, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -4.279360098280449e-06 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 3, 'learning_rate': 0.001}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.0512271586549105e-06 using {'batch_size': 64, 'epochs': 100, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits




Best: -9.5084918605437e-08 using {'batch_size': 64, 'epochs': 200, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 6000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -7.373189347958942e-08 using {'batch_size': 128, 'epochs': 200, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 10000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.2589511977931807e-08 using {'batch_size': 64, 'epochs': 300, 'hidden_layers': 2, 'learning_rate': 0.0001}
Network Size = 30000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.5111262796629887e-08 using {'batch_size': 512, 'epochs': 500, 'hidden_layers': 2, 'learning_rate': 0.001}


Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,100,0.0003125,1,0.1,64,10,2.29686
1,300,1.147e-05,1,0.01,128,50,3.009704
2,600,4.279e-06,3,0.001,256,150,4.987917
3,1000,1.051e-06,1,0.01,64,100,7.714874
4,3000,9.508e-08,1,0.01,64,200,15.715235
5,6000,7.373e-08,1,0.01,128,200,9.939178
6,10000,3.259e-08,2,0.0001,64,300,27.866001
7,30000,1.511e-08,2,0.001,512,500,24.044717


In [11]:
"""
NIG Prices
"""

np.random.seed(seed=123)
set_seed(1234)

res_nig = {}
for t in T:
    print('T = {}'.format(t))
    res_nig[t] = {}
    for n in n_weights:
        print('Network Size = {}'.format(n))
        res_nig[t][n] = run_grid_search(X_train_nig, y_train_nig[t], n, n_epochs[n])
    res_nig[t] = save_results(res_nig[t], 'nig', t)
    display(res_nig[t])

T = 0.3
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -0.00016376167332055047 using {'batch_size': 128, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -7.963892699081043e-06 using {'batch_size': 512, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.2355839348620066e-06 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -8.485220632792334e-07 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -4.298360956056513e-08 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,100,0.0001638,1,0.01,128,50,2.905483
1,300,7.964e-06,1,0.01,512,50,1.767275
2,600,2.236e-06,1,0.01,256,150,4.122229
3,1000,8.485e-07,1,0.01,256,150,5.533207
4,3000,4.298e-08,1,0.01,256,150,4.810141
5,6000,4.89e-08,1,0.01,128,100,5.380864
6,10000,3.236e-08,1,0.001,64,175,13.389454
7,30000,9.437e-09,2,0.0001,64,500,74.785064


T = 1
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits




Best: -0.00010869177185668377 using {'batch_size': 64, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -9.664057074587618e-06 using {'batch_size': 512, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.943445553820311e-06 using {'batch_size': 64, 'epochs': 20, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.1306259430909904e-06 using {'batch_size': 256, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -6.9619661591247e-08 using {'batch_size': 512, 'epochs': 200, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 6000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -7.342146091104951e-0

Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,100,0.0001087,1,0.01,64,50,3.935478
1,300,9.664e-06,1,0.1,512,150,2.991252
2,600,1.943e-06,1,0.01,64,20,3.163044
3,1000,1.131e-06,1,0.01,256,150,4.463068
4,3000,6.962e-08,1,0.01,512,200,6.182472
5,6000,7.342e-08,2,0.001,64,200,16.690628
6,10000,3.439e-08,2,0.0001,64,300,28.325273
7,30000,4.314e-09,1,0.0001,64,500,70.893716


T = 3
Network Size = 100
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -0.00011215805966457992 using {'batch_size': 64, 'epochs': 10, 'hidden_layers': 1, 'learning_rate': 0.1}
Network Size = 300
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -2.5325010938104242e-05 using {'batch_size': 64, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 600
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -3.236826103147905e-06 using {'batch_size': 64, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 1000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.3283259079344134e-06 using {'batch_size': 64, 'epochs': 50, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 3000
Fitting 4 folds for each of 192 candidates, totalling 768 fits




Best: -1.9449969634877107e-07 using {'batch_size': 64, 'epochs': 150, 'hidden_layers': 1, 'learning_rate': 0.01}
Network Size = 6000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -5.213885945209995e-08 using {'batch_size': 128, 'epochs': 200, 'hidden_layers': 2, 'learning_rate': 0.001}
Network Size = 10000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -1.9251213068116613e-08 using {'batch_size': 128, 'epochs': 175, 'hidden_layers': 2, 'learning_rate': 0.001}
Network Size = 30000
Fitting 4 folds for each of 192 candidates, totalling 768 fits
Best: -8.194028566776979e-09 using {'batch_size': 512, 'epochs': 500, 'hidden_layers': 2, 'learning_rate': 0.001}


Unnamed: 0,Weights,Validation MSE,Hidden Layers,Learning Rate,Batch Size,Epochs,Training Time (s)
0,100,0.0001122,1,0.1,64,10,2.282897
1,300,2.533e-05,1,0.01,64,50,4.055408
2,600,3.237e-06,1,0.01,64,50,4.500967
3,1000,1.328e-06,1,0.01,64,50,3.91628
4,3000,1.945e-07,1,0.01,64,150,8.116301
5,6000,5.214e-08,2,0.001,128,200,9.737217
6,10000,1.925e-08,2,0.001,128,175,9.316592
7,30000,8.194e-09,2,0.001,512,500,23.200225


# 3 Run Models on Test Data

In [10]:
np.random.seed(seed=123)
set_seed(1234)

In [11]:
"""
Creates overview with best performing models for each weight and number of hidden layers from grid search
"""

def get_performance_overview(pricer, maturity):
    result_df = pd.read_csv(result_path + 'thirty_dim_{}_{}_grid_search_all_results.csv'.format(pricer, maturity))
    dfs = []
    for hidden in [1, 2, 3]:
        df = result_df[result_df['Hidden Layers'] == hidden]
        # filter best results for each number of weights
        idx = df.groupby(['Weights'])['Validation MSE'].transform(min) == df['Validation MSE']
        df = df[idx].sort_values('Weights')
        # filter for best run time for each number of weights (only if multiple models have same MSE)
        idx = df.groupby(['Weights'])['Training Time (s)'].transform(min) == df['Training Time (s)']
        df = df[idx].sort_values('Weights')
        dfs.append(df)
    return pd.concat(dfs).reset_index(drop=True)

In [12]:
"""
Trains and evaluates all models with best parameters (from grid search) for a given pricing model and maturity
with early stopping
"""
from time import time

def run_tests(df_overview, X_train, y_train, X_test, y_test, pricer, maturity):
    test_mse = []
    train_epochs = []
    train_time = []
    earlyStop = keras.callbacks.EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)    
    for weights, hidden, lr, batch in zip(df_overview['Weights'], df_overview['Hidden Layers'], 
                                          df_overview['Learning Rate'], df_overview['Batch Size']):
        model = create_model(hidden, float(lr), weights)
        start = time()
        hist = model.fit(X_train, y_train, epochs=5000, batch_size=batch, verbose=0, callbacks=[earlyStop])
        train_time.append(time() - start)
        test_mse.append(model.evaluate(X_test, y_test, verbose=0))
        train_epochs.append(hist.epoch[-1])
    df_overview['Test MSE'] = test_mse
    df_overview['Epochs (Early Stop)'] = train_epochs
    df_overview['Training Time (s)'] = train_time
    df_overview['Test MSE'] = df_overview['Test MSE'].apply(lambda x: '{:.3e}'.format(x))
    df_best = df_overview[df_overview['Rank'] == 1].sort_values('Weights')
    df_best = df_best[['Weights', 'Test MSE', 'Hidden Layers', 'Learning Rate', 'Batch Size',
                           'Epochs (Early Stop)', 'Training Time (s)']]
    df_overview.to_csv(result_path + 'thirty_dim_{}_{}_test_all_hidden.csv'.format(pricer, maturity), index=False)
    df_best.to_csv(result_path + 'thirty_dim_{}_{}_test_overview.csv'.format(pricer, maturity), index=False)
    return df_best

In [13]:
"""
Load all grid search overviews
"""

overviews = {}

for pricer in ['merton', 'nig']:
    overviews[pricer] = {}
    for t in T:
        overviews[pricer][t] = get_performance_overview(pricer, t)

In [14]:
"""
Run tests on Merton model data
"""

tests_mert = {}

for t in T:
    tests_mert[t] = run_tests(overviews['merton'][t], X_train_mert, y_train_mert[t], X_test_mert, y_test_mert[t],
                              'merton', t)
    display(tests_mert[t])

Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,100,0.0001479,1,0.01,512,58,0.610493
1,300,0.0001412,1,0.01,64,24,0.94076
2,600,1.419e-06,1,0.01,512,163,1.257532
3,1000,3.528e-07,1,0.01,128,41,0.990956
4,3000,7.962e-08,1,0.01,256,200,2.261756
13,6000,2.955e-08,2,0.001,64,69,2.104379
14,10000,8.749e-09,2,0.0001,64,210,5.836652
7,30000,5.835e-09,1,0.0001,64,302,11.972078


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,100,0.0435,1,0.1,512,14,0.364058
1,300,6.609e-06,1,0.1,512,48,0.509638
2,600,2.585e-05,1,0.01,512,142,1.022232
3,1000,1.736e-06,1,0.01,128,77,1.405243
4,3000,1.038e-08,1,0.01,64,31,1.069283
5,6000,3.695e-08,1,0.01,256,180,2.09363
14,10000,2.099e-08,2,0.001,128,68,1.928816
15,30000,1.13e-08,2,0.001,128,73,2.127311


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,100,1.059e-05,1,0.1,64,19,0.746041
1,300,7.062e-07,1,0.01,128,44,0.803865
18,600,0.05203,3,0.001,256,62,0.878765
3,1000,1.092e-06,1,0.01,64,38,1.148972
4,3000,2.676e-08,1,0.01,64,46,1.62604
5,6000,1.44e-07,1,0.01,128,92,1.818151
14,10000,1.758e-08,2,0.0001,64,203,6.202453
15,30000,2.791e-08,2,0.001,512,147,2.160224


In [15]:
"""
Run tests on NIG model data
"""

tests_nig = {}

for t in T:
    tests_nig[t] = run_tests(overviews['nig'][t], X_train_nig, y_train_nig[t], X_test_nig, y_test_nig[t],
                              'nig', t)
    display(tests_nig[t])

Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,100,0.0001846,1,0.01,128,18,0.548521
1,300,0.0001804,1,0.01,512,51,0.527781
2,600,1.788e-06,1,0.01,256,45,0.621364
3,1000,8.828e-07,1,0.01,256,118,1.246667
4,3000,2.049e-07,1,0.01,256,79,1.193963
5,6000,1.917e-08,1,0.01,128,85,1.540892
6,10000,4.575e-08,1,0.001,64,155,4.679589
15,30000,7.473e-08,2,0.0001,64,132,5.606055


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,100,0.05369,1,0.01,64,10,0.555517
1,300,0.0001873,1,0.1,512,26,0.567454
2,600,1.111e-06,1,0.01,64,32,1.174861
3,1000,7.045e-06,1,0.01,256,60,0.917546
4,3000,1.355e-07,1,0.01,512,253,1.797336
13,6000,1.272e-07,2,0.001,64,66,2.269943
14,10000,9.464e-08,2,0.0001,64,194,6.12674
7,30000,1.008e-07,1,0.0001,64,285,11.246945


Unnamed: 0,Weights,Test MSE,Hidden Layers,Learning Rate,Batch Size,Epochs (Early Stop),Training Time (s)
0,100,0.000247,1,0.1,64,8,0.743001
1,300,0.0001495,1,0.01,64,30,1.108038
2,600,3.911e-06,1,0.01,64,52,1.68154
3,1000,1.512e-06,1,0.01,64,44,1.329485
4,3000,1.827e-07,1,0.01,64,57,1.842082
13,6000,8.677e-08,2,0.001,128,120,2.201541
14,10000,2.395e-07,2,0.001,128,88,1.945955
15,30000,1.394e-07,2,0.001,512,190,2.329997
