# Read results datasets

In [1]:
import itertools
import os.path
from typing import List

import numpy as np
import pandas as pd

from src.models.config import param_layers, param_grid_mlp
from src.utils.const import MODEL_RESULTS_CSV

In [2]:
RESULTS_FOLDER=os.path.join('..',MODEL_RESULTS_CSV)

In [3]:
mlp= pd.read_csv(os.path.join(RESULTS_FOLDER,'out_mlp_all.csv'))

In [4]:
svm_res= pd.read_csv(os.path.join(RESULTS_FOLDER,'out_svm.csv'))

In [5]:
naive_res= pd.read_csv(os.path.join(RESULTS_FOLDER,'best_out_naive_bayes.csv'))

In [6]:
tree_res= pd.read_csv(os.path.join(RESULTS_FOLDER,'best_out_tree_based.csv'))

In [7]:
mlp

Unnamed: 0.1,Unnamed: 0,cfg,fold,loss_test,acc_test,f1_test,mean_loss,std_loss,mean_acc_val,std_acc_val,mean_acc_train,std_acc_train,mean_f1_train,std_f1_train,mean_f1_val,std_f1_val
0,0,0,1,0.372040,85.285171,0.853134,0.306360,0.017720,79.643519,0.993016,87.469802,0.759901,0.873412,0.007689,0.797929,0.009758
1,1,1,1,0.344320,85.171103,0.852068,0.302731,0.020831,79.783835,0.788934,87.632241,0.883383,0.875075,0.008986,0.799156,0.007691
2,2,2,1,0.361184,85.285171,0.853100,0.304689,0.018806,79.536465,0.954451,87.559346,0.787205,0.874340,0.008016,0.796486,0.009295
3,3,3,1,0.352550,85.817490,0.858861,0.306855,0.018824,79.500103,1.004205,87.455783,0.799693,0.873303,0.008139,0.795906,0.010032
4,4,4,1,2.237197,10.798479,0.097864,0.673969,0.734426,65.168832,28.681150,73.078158,28.773956,0.713885,0.318954,0.647411,0.297186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
955,955,187,5,0.481808,80.486877,0.805378,1.271628,0.869597,44.391021,33.047878,50.101529,33.251459,0.466255,0.365353,0.429138,0.345926
956,956,188,5,2.295036,16.774439,0.102313,1.276690,0.870067,44.191214,33.077768,49.910281,33.267216,0.464154,0.365526,0.427009,0.346253
957,957,189,5,2.289519,16.127805,0.101492,1.281689,0.870492,43.989620,33.109832,49.726107,33.276257,0.462157,0.365599,0.424895,0.346572
958,958,190,5,2.261616,19.475086,0.166811,1.286323,0.870557,43.820917,33.105664,49.545093,33.282774,0.460186,0.365651,0.423049,0.346610


In [8]:
def find_max_f1_cfg(df:pd.DataFrame)-> List:
    cfg=[]
    for fold in df['fold'].unique():
        idx=df[df['fold']==fold]['f1_test'].idxmax()
        cfg.append(df.iloc[idx]['cfg'])
    cfgs=np.unique(np.array(cfg))
    return cfgs

In [9]:
best_cfg=find_max_f1_cfg(mlp)

In [10]:
def mu_confidence_interval(data: np.ndarray) -> {}:
    t = 1.64
    mu = np.mean(data)
    standard_deviation = np.std(data)
    M = data.shape[0]
    t_student = t * standard_deviation / np.sqrt(M)
    first_interval = mu - t_student
    second_interval = mu + t_student
    return {
        'mu': mu,
        't_student': t_student,
        'first_interval': first_interval,
        'second_interval': second_interval
    }

In [11]:
def find_best_conf(lst_conf, df:pd.DataFrame)-> dict:
    conf=[]
    for idx,cfg in enumerate(lst_conf):

        conf.append(mu_confidence_interval(df[df['cfg']==cfg]['f1_test']))
        conf[idx]['cfg']=cfg
    max=conf[0]

    for elm in conf:
        if max['mu']<elm['mu']:
            max=elm
    return max

### Results best cfg mlp

In [12]:
res_mlp=find_best_conf(best_cfg,mlp)
res_mlp

{'mu': 0.8610175579840659,
 't_student': 0.007531035084099725,
 'first_interval': 0.8534865228999662,
 'second_interval': 0.8685485930681656,
 'cfg': 18.0}

In [14]:
hyper_parameters_model_all = itertools.product(
            param_layers['input_act'],
            param_layers['hidden_act'],
            param_layers['hidden_size'],
            param_layers['num_hidden_layers'],
            param_layers['dropout'],
            param_layers['batch_norm'],
            param_layers['output_fn'],
            param_grid_mlp['starting_lr'],
            param_grid_mlp['num_epochs'],
            param_grid_mlp['batch_size'],
            param_grid_mlp['optim'],
            param_grid_mlp['momentum'],
            param_grid_mlp['weight_decay'],
        )

### Find best cfg mlp

In [15]:
list(hyper_parameters_model_all)[int(res_mlp['cfg'])]

(LeakyReLU(negative_slope=0.01),
 LeakyReLU(negative_slope=0.01),
 512,
 3,
 0.2,
 True,
 None,
 0.001,
 1,
 128,
 torch.optim.adam.Adam,
 0.9,
 1e-05)

## Scikit learn best cfg

### Tree based

#### Random forest

In [35]:
print(tree_res[tree_res['model']=='random_forest_classifier']['cfg'].unique())

["{'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 700}"]


In [27]:
print(f"random forest : {mu_confidence_interval(tree_res[tree_res['model']=='random_forest_classifier']['f1_test'])} ")

random forest : {'mu': 0.609756989767005, 't_student': 0.005027550044208888, 'first_interval': 0.6047294397227961, 'second_interval': 0.6147845398112138} 


#### Decision tree

In [36]:
print(tree_res[tree_res['model']=='decision_tree_classifier']['cfg'].unique())

["{'criterion': 'entropy', 'max_depth': 15}"]


In [29]:
print(f"decision tree classifier : {mu_confidence_interval(tree_res[tree_res['model']=='decision_tree_classifier']['f1_test'])} ")

random forest : {'mu': 0.6402913493313054, 't_student': 0.003246723679220664, 'first_interval': 0.6370446256520848, 'second_interval': 0.643538073010526} 


## Naive bayes

#### Gaussian naive bayes

In [37]:
print(naive_res[naive_res['model']=='gaussian_nb']['cfg'].unique())

["{'var_smoothing': 8.111308307896872e-07}"]


In [30]:
print(f"gaussian_nb : {mu_confidence_interval(naive_res[naive_res['model']=='gaussian_nb']['f1_test'])} ")

gaussian_nb : {'mu': 0.4526802764969494, 't_student': 0.008227757785451987, 'first_interval': 0.44445251871149744, 'second_interval': 0.4609080342824014} 


#### QDA

In [38]:
print(naive_res[naive_res['model']=='qda']['cfg'].unique())

["{'reg_param': 0.001, 'tol': 0.0001}"]


In [34]:
print(f"qda : {mu_confidence_interval(naive_res[naive_res['model']=='qda']['f1_test'])} ")

qda : {'mu': 0.5217235201683785, 't_student': 0.006131748988284651, 'first_interval': 0.5155917711800939, 'second_interval': 0.5278552691566631} 


## SVM

In [40]:
print(svm_res['cfg'].unique())

["{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"]


In [33]:
print(f"SVM : {mu_confidence_interval(svm_res['f1_test'])} ")

SVM : {'mu': 0.8286206857647119, 't_student': 0.003255026164399803, 'first_interval': 0.8253656596003122, 'second_interval': 0.8318757119291117} 
