In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from dython import nominal
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, KFold
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline
from sklearn.metrics import precision_score, recall_score, f1_score, roc_curve, auc, confusion_matrix
import pickle 

In [2]:
results_dir = './data/results/'

In [3]:
def group_seeded_data(data, groupby_columns, droped_cols=['seed', 'split']):
    grouped_data = data.groupby(groupby_columns).agg('mean').reset_index().drop(droped_cols, axis=1)
    return grouped_data

# Classification data

In [4]:
svm_cla = pd.read_csv(results_dir + 'support_vector_classification_results_df.csv', index_col=[0])

In [5]:
gsvm_cla = group_seeded_data(svm_cla, 
                             ['C', 'kernel', 'gamma', 'shrinking', 'seed'],
                             droped_cols=['split']
                            )

In [6]:
gsvm_cla

Unnamed: 0,C,kernel,gamma,shrinking,seed,train_accuracy,valid_accuracy,valid_precision,valid_recall,valid_f1_score,valid_auroc
0,0.1,linear,0.1,False,193768,0.964340,0.963688,0.988112,0.941205,0.964056,0.964552
1,0.1,linear,0.1,False,327407,0.963918,0.961533,0.986595,0.938344,0.961830,0.962303
2,0.1,linear,0.1,False,586930,0.964417,0.964150,0.986008,0.944362,0.964678,0.965006
3,0.1,linear,0.1,False,660108,0.964687,0.962763,0.985399,0.942221,0.963262,0.963632
4,0.1,linear,0.1,False,720389,0.963918,0.962302,0.985294,0.940957,0.962587,0.963002
...,...,...,...,...,...,...,...,...,...,...,...
595,0.9,sigmoid,10.0,True,193768,0.497807,0.497771,0.515383,0.511476,0.513330,0.497292
596,0.9,sigmoid,10.0,True,327407,0.492114,0.489149,0.507159,0.505156,0.505496,0.489086
597,0.9,sigmoid,10.0,True,586930,0.493076,0.494690,0.512879,0.509264,0.510738,0.494490
598,0.9,sigmoid,10.0,True,660108,0.494460,0.496534,0.514046,0.510062,0.511925,0.496039


In [7]:
svm_cla_acc = gsvm_cla['valid_accuracy'].mean()
svm_cla_prec = gsvm_cla['valid_precision'].mean()
svm_cla_rec = gsvm_cla['valid_recall'].mean()
svm_cla_f1 = gsvm_cla['valid_f1_score'].mean()
svm_cla_auroc = gsvm_cla['valid_auroc'].mean()

svm_cla_acc_var = gsvm_cla['valid_accuracy'].var()
svm_cla_prec_var = gsvm_cla['valid_precision'].var()
svm_cla_rec_var = gsvm_cla['valid_recall'].var()
svm_cla_f1_var = gsvm_cla['valid_f1_score'].var()
svm_cla_auroc_var = gsvm_cla['valid_auroc'].var()

In [8]:
print(f"The average accuracy score on the valid set is {svm_cla_acc.round(3)}, while the variance is {svm_cla_acc_var.round(3)}\n")
print(f"The average precision score on the valid set is {svm_cla_prec.round(3)}, while the variance is {svm_cla_prec_var.round(3)}\n")
print(f"The average recall score on the valid set is {svm_cla_rec.round(3)}, while the variance is {svm_cla_rec_var.round(3)}\n")
print(f"The average f1 score on the valid set is {svm_cla_f1.round(3)}, while the variance is {svm_cla_f1_var.round(3)}\n")
print(f"The average auroc score on the valid set is {svm_cla_auroc.round(3)}, while the variance is {svm_cla_auroc_var.round(3)}\n")

The average accuracy score on the valid set is 0.855, while the variance is 0.048

The average precision score on the valid set is 0.859, while the variance is 0.045

The average recall score on the valid set is 0.863, while the variance is 0.046

The average f1 score on the valid set is 0.86, while the variance is 0.045

The average auroc score on the valid set is 0.854, while the variance is 0.048



------------

In [9]:
lr_reg = pd.read_csv(results_dir + 'logistic_classification_results_df.csv', index_col=[0])

In [10]:
glr_reg = group_seeded_data(lr_reg, 
                            ['penalty', 'C','solver', 'seed'],
                            droped_cols=['split']
                           )

In [11]:
glr_reg

Unnamed: 0,penalty,C,solver,seed,train_accuracy,valid_accuracy,valid_precision,valid_recall,valid_f1_score,valid_auroc
0,elasticnet,0.1,saga,193768,0.952647,0.952146,0.965229,0.941491,0.953175,0.952587
1,elasticnet,0.1,saga,327407,0.951493,0.950917,0.963393,0.940730,0.951924,0.951245
2,elasticnet,0.1,saga,586930,0.951608,0.950300,0.964565,0.938553,0.951334,0.950817
3,elasticnet,0.1,saga,660108,0.950992,0.950453,0.963150,0.940319,0.951586,0.950858
4,elasticnet,0.1,saga,720389,0.953762,0.952762,0.965907,0.941863,0.953684,0.953044
...,...,...,...,...,...,...,...,...,...,...
595,none,1.0,saga,193768,0.964994,0.962763,0.968205,0.959679,0.963852,0.962955
596,none,1.0,saga,327407,0.965649,0.965072,0.970586,0.961604,0.966071,0.965158
597,none,1.0,saga,586930,0.967841,0.966147,0.971518,0.962787,0.967094,0.966273
598,none,1.0,saga,660108,0.966187,0.964763,0.971119,0.960491,0.965755,0.964951


In [12]:
lr_reg_acc = glr_reg['valid_accuracy'].mean()
lr_reg_prec = glr_reg['valid_precision'].mean()
lr_reg_rec = glr_reg['valid_recall'].mean()
lr_reg_f1 = glr_reg['valid_f1_score'].mean()
lr_reg_auroc = glr_reg['valid_auroc'].mean()

lr_reg_acc_var = glr_reg['valid_accuracy'].var()
lr_reg_prec_var = glr_reg['valid_precision'].var()
lr_reg_rec_var = glr_reg['valid_recall'].var()
lr_reg_f1_var = glr_reg['valid_f1_score'].var()
lr_reg_auroc_var = glr_reg['valid_auroc'].var()

In [13]:
print(f"The average accuracy score on the valid set is {lr_reg_acc.round(3)}, while the variance is {lr_reg_acc_var.round(3)}\n")
print(f"The average precision score on the valid set is {lr_reg_prec.round(3)}, while the variance is {lr_reg_prec_var.round(3)}\n")
print(f"The average recall score on the valid set is {lr_reg_rec.round(3)}, while the variance is {lr_reg_rec_var.round(3)}\n")
print(f"The average f1 score on the valid set is {lr_reg_f1.round(3)}, while the variance is {lr_reg_f1_var.round(3)}\n")
print(f"The average auroc score on the valid set is {lr_reg_auroc.round(3)}, while the variance is {lr_reg_auroc_var.round(3)}\n")

The average accuracy score on the valid set is 0.962, while the variance is 0.0

The average precision score on the valid set is 0.97, while the variance is 0.0

The average recall score on the valid set is 0.956, while the variance is 0.0

The average f1 score on the valid set is 0.963, while the variance is 0.0

The average auroc score on the valid set is 0.962, while the variance is 0.0



-------------

In [14]:
dt_cla = pd.read_csv(results_dir + 'decision_tree_classification_results_df.csv', index_col=[0])

In [15]:
gdt_cla = group_seeded_data(dt_cla, 
                            ['criterions', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features', 'seed'],
                            droped_cols=['split']
                           )

In [16]:
gdt_cla

Unnamed: 0,criterions,max_depth,min_samples_split,min_samples_leaf,max_features,seed,train_accuracy,valid_accuracy,valid_precision,valid_recall,valid_f1_score,valid_auroc
0,entropy,5,2,8,auto,193768,0.986652,0.986305,1.000000,0.973573,0.986605,0.986786
1,entropy,5,2,8,auto,327407,0.991383,0.991384,0.987624,0.995849,0.991714,0.991231
2,entropy,5,2,8,auto,586930,0.987537,0.987537,0.997872,0.978119,0.987890,0.987944
3,entropy,5,2,8,auto,660108,0.981882,0.980612,0.987415,0.975061,0.981193,0.980801
4,entropy,5,2,8,auto,720389,0.996038,0.995692,0.993176,0.998538,0.995840,0.995627
...,...,...,...,...,...,...,...,...,...,...,...,...
3235,gini,60,4,12,sqrt,193768,0.995115,0.993999,0.994048,0.994330,0.994185,0.993985
3236,gini,60,4,12,sqrt,327407,0.996384,0.996154,0.994126,0.998519,0.996309,0.996051
3237,gini,60,4,12,sqrt,586930,0.999154,0.998461,1.000000,0.996937,0.998463,0.998468
3238,gini,60,4,12,sqrt,660108,0.993768,0.991536,0.995259,0.988381,0.991795,0.991615


In [17]:
dt_cla_acc = gdt_cla['valid_accuracy'].mean()
dt_cla_prec = gdt_cla['valid_precision'].mean()
dt_cla_rec = gdt_cla['valid_recall'].mean()
dt_cla_f1 = gdt_cla['valid_f1_score'].mean()
dt_cla_auroc = gdt_cla['valid_auroc'].mean()

dt_cla_acc_var = gdt_cla['valid_accuracy'].var()
dt_cla_prec_var = gdt_cla['valid_precision'].var()
dt_cla_rec_var = gdt_cla['valid_recall'].var()
dt_cla_f1_var = gdt_cla['valid_f1_score'].var()
dt_cla_auroc_var = gdt_cla['valid_auroc'].var()

In [18]:
print(f"The average accuracy score on the valid set is {dt_cla_acc.round(3)}, while the variance is {dt_cla_acc_var.round(3)}\n")
print(f"The average precision score on the valid set is {dt_cla_prec.round(3)}, while the variance is {dt_cla_prec_var.round(3)}\n")
print(f"The average recall score on the valid set is {dt_cla_rec.round(3)}, while the variance is {dt_cla_rec_var.round(3)}\n")
print(f"The average f1 score on the valid set is {dt_cla_f1.round(3)}, while the variance is {dt_cla_f1_var.round(3)}\n")
print(f"The average auroc score on the valid set is {dt_cla_auroc.round(3)}, while the variance is {dt_cla_auroc_var.round(3)}\n")

The average accuracy score on the valid set is 0.995, while the variance is 0.0

The average precision score on the valid set is 0.995, while the variance is 0.0

The average recall score on the valid set is 0.995, while the variance is 0.0

The average f1 score on the valid set is 0.995, while the variance is 0.0

The average auroc score on the valid set is 0.995, while the variance is 0.0



-----

In [19]:
lstm_cla = pd.read_csv(results_dir + 'long_short_term_memory_classification_results_df.csv', index_col=[0])

In [20]:
glstm_cla = group_seeded_data(lstm_cla,
                              ['optimizer', 'hidden_layers_no','units', 'seed']
                             )

In [21]:
lstm_cla

Unnamed: 0,task,model,seed,split,optimizer,hidden_layers_no,units,train_accuracy,valid_accuracy,valid_precision,valid_recall,valid_f1_score,valid_auroc
0,classification,LSTM,660108,1,adam,5,32,0.939,0.920,0.968300,0.968300,0.9190,0.916
1,classification,LSTM,660108,1,adam,5,64,0.997,0.995,0.997118,0.997118,0.9955,0.995
2,classification,LSTM,660108,1,adam,5,128,1.000,1.000,1.000000,1.000000,1.0000,1.000
3,classification,LSTM,660108,1,adam,5,256,1.000,1.000,1.000000,1.000000,1.0000,1.000
4,classification,LSTM,660108,1,adam,6,32,0.988,0.980,0.988473,0.988473,0.9795,0.979
...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,classification,LSTM,327407,5,adam,6,256,1.000,1.000,1.000000,1.000000,1.0000,1.000
296,classification,LSTM,327407,5,adam,7,32,0.933,0.941,0.991004,0.991004,0.9405,0.939
297,classification,LSTM,327407,5,adam,7,64,0.994,0.988,0.997001,0.997001,0.9885,0.988
298,classification,LSTM,327407,5,adam,7,128,0.999,0.997,0.994003,0.994003,0.9970,0.997


In [22]:
lstm_cla_acc = glstm_cla['valid_accuracy'].mean()
lstm_cla_prec = glstm_cla['valid_precision'].mean()
lstm_cla_rec = glstm_cla['valid_recall'].mean()
lstm_cla_f1 = glstm_cla['valid_f1_score'].mean()
lstm_cla_auroc = glstm_cla['valid_auroc'].mean()

lstm_cla_acc_var = glstm_cla['valid_accuracy'].var()
lstm_cla_prec_var = glstm_cla['valid_precision'].var()
lstm_cla_rec_var = glstm_cla['valid_recall'].var()
lstm_cla_f1_var = glstm_cla['valid_f1_score'].var()
lstm_cla_auroc_var = glstm_cla['valid_auroc'].var()

In [23]:
print(f"The average accuracy score on the valid set is {lstm_cla_acc.round(3)}, while the variance is {lstm_cla_acc_var.round(3)}\n")
print(f"The average precision score on the valid set is {lstm_cla_prec.round(3)}, while the variance is {lstm_cla_prec_var.round(3)}\n")
print(f"The average recall score on the valid set is {lstm_cla_rec.round(3)}, while the variance is {lstm_cla_rec_var.round(3)}\n")
print(f"The average f1 score on the valid set is {lstm_cla_f1.round(3)}, while the variance is {lstm_cla_f1_var.round(3)}\n")
print(f"The average auroc score on the valid set is {lstm_cla_auroc.round(3)}, while the variance is {lstm_cla_auroc_var.round(3)}\n")

The average accuracy score on the valid set is 0.991, while the variance is 0.0

The average precision score on the valid set is 0.992, while the variance is 0.0

The average recall score on the valid set is 0.992, while the variance is 0.0

The average f1 score on the valid set is 0.991, while the variance is 0.0

The average auroc score on the valid set is 0.991, while the variance is 0.0



# Regression data

In [24]:
lr_reg = pd.read_csv(results_dir + 'linear_regression_results_df.csv', index_col=[0])

In [25]:
glr_reg = group_seeded_data(lr_reg, 
                            ['number_of_features'],
                             droped_cols=['split']
                            )

In [26]:
glr_reg

Unnamed: 0,number_of_features,valid_mean_value,predicted_mean_value,percentage_difference,valid_rsq,valid_mae,valid_mse,valid_rmse
0,2,72.41761,74.659105,-0.070478,0.074526,6.178487,99.642719,9.876473
1,3,72.41761,74.687269,-0.070115,0.098934,6.014236,96.558137,9.719988
2,4,72.41761,74.702573,-0.069924,0.111109,5.899571,95.127047,9.642918
3,5,72.41761,74.668912,-0.069416,0.110058,5.915964,94.984329,9.639451
4,6,72.41761,74.084372,-0.057117,0.221471,5.620456,83.79071,9.040597
5,7,72.41761,73.97509,-0.055365,0.221076,5.627676,83.995032,9.044876
6,8,72.41761,73.538304,-0.047984,0.211821,5.783762,84.997245,9.098507
7,9,72.41761,73.478467,-0.046527,0.214727,5.859601,84.433934,9.079578
8,10,72.41761,73.463475,-0.046217,0.214615,5.864895,84.448303,9.08036
9,11,72.41761,71.651139,-0.020114,0.185374,6.153501,86.181487,9.191966


In [27]:
lr_reg_rsq = glr_reg['valid_rsq'].mean()
lr_reg_mae = glr_reg['valid_mae'].mean()
lr_reg_mse = glr_reg['valid_mse'].mean()
lr_reg_rmse = glr_reg['valid_rmse'].mean()

lr_reg_rsq_var = glr_reg['valid_rsq'].var()
lr_reg_mae_var = glr_reg['valid_mae'].var()
lr_reg_mse_var = glr_reg['valid_mse'].var()
lr_reg_rmse_var = glr_reg['valid_rmse'].var()


In [45]:
print(f"The average Rsq score on the valid set is {lr_reg_rsq.round(3)}, while the variance is {lr_reg_rsq_var.round(3)}\n")
print(f"The average MAE score on the valid set is {lr_reg_mae.round(3)}, while the variance is {lr_reg_mae_var.round(3)}\n")
print(f"The average MSE score on the valid set is {lr_reg_mse.round(3)}, while the variance is {lr_reg_mse_var.round(3)}\n")
print(f"The average RMSE score on the valid set is {lr_reg_rmse.round(3)}, while the variance is {lr_reg_rmse_var.round(3)}\n")


The average Rsq score on the valid set is -0.287, while the variance is 0.473

The average MAE score on the valid set is 6.902, while the variance is 1.023

The average MSE score on the valid set is 143.63, while the variance is 7143.85

The average RMSE score on the valid set is 10.754, while the variance is 4.231



----------

In [29]:
svm_reg = pd.read_csv(results_dir + 'support_vector_regression_results_df.csv', index_col=[0])

In [30]:
gsvm_reg = group_seeded_data(svm_reg, 
                             ['C', 'kernel','gamma', 'epsilon'],
                             droped_cols=['split']
                            )

In [31]:
gsvm_reg

Unnamed: 0,C,kernel,gamma,epsilon,valid_mean_value,predicted_mean_value,percentage_difference,valid_rsq,valid_mae,valid_mse,valid_rmse
0,0.5,linear,auto,0.001,72.41761,73.728943,-0.049677,0.064877,5.592276,102.773805,9.937988
1,0.5,linear,auto,0.010,72.41761,73.747793,-0.049945,0.066615,5.586554,102.578448,9.925052
2,0.5,linear,auto,0.100,72.41761,73.762580,-0.050208,0.066276,5.587580,102.442169,9.922919
3,0.5,linear,auto,1.000,72.41761,73.155055,-0.041292,0.117811,5.619191,96.704448,9.646327
4,0.5,linear,scale,0.001,72.41761,73.728943,-0.049677,0.064877,5.592276,102.773805,9.937988
...,...,...,...,...,...,...,...,...,...,...,...
219,30.0,sigmoid,auto,1.000,72.41761,60.376971,0.147704,-68.857283,58.503712,7011.870937,77.427703
220,30.0,sigmoid,scale,0.001,72.41761,83.369022,-0.208615,-82.495076,68.293636,8634.578038,90.290265
221,30.0,sigmoid,scale,0.010,72.41761,83.366091,-0.208583,-82.496540,68.296017,8635.183121,90.294580
222,30.0,sigmoid,scale,0.100,72.41761,82.997084,-0.204684,-82.527679,68.428990,8643.052359,90.349703


In [32]:
svm_reg_rsq = gsvm_reg['valid_rsq'].mean()
svm_reg_mae = gsvm_reg['valid_mae'].mean()
svm_reg_mse = gsvm_reg['valid_mse'].mean()
svm_reg_rmse = gsvm_reg['valid_rmse'].mean()

svm_reg_rsq_var = gsvm_reg['valid_rsq'].var()
svm_reg_mae_var = gsvm_reg['valid_mae'].var()
svm_reg_mse_var = gsvm_reg['valid_mse'].var()
svm_reg_rmse_var = gsvm_reg['valid_rmse'].var()

In [46]:
print(f"The average Rsq score on the valid set is {svm_reg_rsq.round(3)}, while the variance is {svm_reg_rsq_var.round(3)}\n")
print(f"The average MAE score on the valid set is {svm_reg_mae.round(3)}, while the variance is {svm_reg_mae_var.round(3)}\n")
print(f"The average MSE score on the valid set is {svm_reg_mse.round(3)}, while the variance is {svm_reg_mse_var.round(3)}\n")
print(f"The average RMSE score on the valid set is {svm_reg_rmse.round(3)}, while the variance is {svm_reg_rmse_var.round(3)}\n")

The average Rsq score on the valid set is -25.413, while the variance is 1443.596

The average MAE score on the valid set is 14.593, while the variance is 238.14

The average MSE score on the valid set is 2995.144, while the variance is 19360240.152

The average RMSE score on the valid set is 28.976, while the variance is 574.617



------------

In [34]:
dt_reg = pd.read_csv(results_dir + 'decision_tree_regression_results_df.csv', index_col=[0])

In [35]:
gdt_reg = group_seeded_data(dt_reg, 
                            ['criterions', 'max_depth','min_samples_split', 'min_samples_leaf', 'max_features'],
                            droped_cols=['split', 'seed']
                           )

In [36]:
gdt_reg

Unnamed: 0,criterions,max_depth,min_samples_split,min_samples_leaf,max_features,valid_mean_value,predicted_mean_value,percentage_difference,valid_rsq,valid_mae,valid_mse,valid_rmse
0,absolute_error,5,2,8,auto,72.41761,73.713208,-0.048863,0.191702,3.916981,85.803774,9.163586
1,absolute_error,5,2,8,log2,72.41761,74.027044,-0.055047,0.105576,4.464780,92.860692,9.559643
2,absolute_error,5,2,8,sqrt,72.41761,73.981761,-0.054388,0.137131,4.220755,90.795912,9.420829
3,absolute_error,5,2,10,auto,72.41761,73.995597,-0.052514,0.244565,3.781761,79.896541,8.815140
4,absolute_error,5,2,10,log2,72.41761,74.247170,-0.058102,0.131428,4.262264,91.385220,9.443478
...,...,...,...,...,...,...,...,...,...,...,...,...
1291,squared_error,60,4,10,log2,72.41761,72.691228,-0.034159,0.196089,4.950189,83.144611,9.061181
1292,squared_error,60,4,10,sqrt,72.41761,72.418099,-0.030062,0.156806,5.281237,87.165776,9.274272
1293,squared_error,60,4,12,auto,72.41761,72.265554,-0.026288,0.272268,4.760421,74.911416,8.589236
1294,squared_error,60,4,12,log2,72.41761,72.544638,-0.032279,0.177510,5.147131,84.782095,9.150116


In [37]:
dt_reg_rsq = gdt_reg['valid_rsq'].mean()
dt_reg_mae = gdt_reg['valid_mae'].mean()
dt_reg_mse = gdt_reg['valid_mse'].mean()
dt_reg_rmse = gdt_reg['valid_rmse'].mean()

dt_reg_rsq_var = gdt_reg['valid_rsq'].var()
dt_reg_mae_var = gdt_reg['valid_mae'].var()
dt_reg_mse_var = gdt_reg['valid_mse'].var()
dt_reg_rmse_var = gdt_reg['valid_rmse'].var()

In [47]:
print(f"The average Rsq score on the valid set is {dt_reg_rsq.round(3)}, while the variance is {dt_reg_rsq_var.round(3)}\n")
print(f"The average MAE score on the valid set is {dt_reg_mae.round(3)}, while the variance is {dt_reg_mae_var.round(3)}\n")
print(f"The average MSE score on the valid set is {dt_reg_mse.round(3)}, while the variance is {dt_reg_mse_var.round(3)}\n")
print(f"The average RMSE score on the valid set is {dt_reg_rmse.round(3)}, while the variance is {dt_reg_rmse_var.round(3)}\n")

The average Rsq score on the valid set is 0.122, while the variance is 0.015

The average MAE score on the valid set is 5.18, while the variance is 0.771

The average MSE score on the valid set is 91.204, while the variance is 163.039

The average RMSE score on the valid set is 9.45, while the variance is 0.44



------------

In [39]:
lstm_reg = pd.read_csv(results_dir + 'long_short_term_memory_regression_results_df.csv', index_col=[0])

In [40]:
glstm_reg = group_seeded_data(lstm_reg, 
                              ['optimizer', 'hidden_layers_no','units'],
                              ['split']
                             )

In [41]:
glstm_reg

Unnamed: 0,optimizer,hidden_layers_no,units,valid_mean_value,predicted_mean_value,percentage_difference,valid_rsq,valid_mae,valid_mse,valid_rmse,train_mae,train_mse,train_rmse
0,adam,3,16,72.41761,77.08876,-0.105013,-0.083443,5.856336,117.167086,10.669617,4.067463,76.958173,8.711704
1,adam,3,32,72.41761,76.952004,-0.10336,-0.09016,5.916133,117.673322,10.706694,4.02146,76.211371,8.659324
2,adam,3,64,72.41761,77.095261,-0.105007,-0.072378,5.760079,115.432206,10.614371,4.09905,76.665149,8.670707
3,adam,3,128,72.41761,76.63593,-0.098769,-0.070039,6.087903,115.075855,10.598905,4.04754,75.234899,8.57833
4,adam,3,256,72.41761,75.340939,-0.081541,-0.041583,6.7047,112.67079,10.500757,5.703001,80.673287,8.86887
5,adam,3,384,72.41761,75.606296,-0.085079,-0.097304,7.025907,116.254675,10.722166,6.244446,86.751409,9.175203
6,adam,4,16,72.41761,77.564971,-0.112561,-0.213595,6.260127,128.782,11.159523,4.235508,82.781574,9.006553
7,adam,4,32,72.41761,77.154638,-0.105935,-0.086086,5.801755,116.972779,10.679193,4.104402,77.33565,8.711651
8,adam,4,64,72.41761,76.833273,-0.101366,-0.068468,5.991021,114.69835,10.58274,4.018709,75.691896,8.602193
9,adam,4,128,72.41761,76.948961,-0.102917,-0.053107,5.738439,113.959603,10.53339,4.179773,75.235306,8.595893


In [42]:
lstm_reg_rsq = glstm_reg['valid_rsq'].mean()
lstm_reg_mae = glstm_reg['valid_mae'].mean()
lstm_reg_mse = glstm_reg['valid_mse'].mean()
lstm_reg_rmse = glstm_reg['valid_rmse'].mean()

lstm_reg_rsq_var = glstm_reg['valid_rsq'].var()
lstm_reg_mae_var = glstm_reg['valid_mae'].var()
lstm_reg_mse_var = glstm_reg['valid_mse'].var()
lstm_reg_rmse_var = glstm_reg['valid_rmse'].var()

In [48]:
print(f"The average Rsq score on the valid set is {lstm_reg_rsq.round(3)}, while the variance is {lstm_reg_rsq_var.round(3)}\n")
print(f"The average MAE score on the valid set is {lstm_reg_mae.round(3)}, while the variance is {lstm_reg_mae_var.round(3)}\n")
print(f"The average MSE score on the valid set is {lstm_reg_mse.round(3)}, while the variance is {lstm_reg_mse_var.round(3)}\n")
print(f"The average RMSE score on the valid set is {lstm_reg_rmse.round(3)}, while the variance is {lstm_reg_rmse_var.round(3)}\n")

The average Rsq score on the valid set is -0.156, while the variance is 0.014

The average MAE score on the valid set is 6.575, while the variance is 0.275

The average MSE score on the valid set is 123.633, while the variance is 138.879

The average RMSE score on the valid set is 10.975, while the variance is 0.253



------------