In [1]:
from mlaut.analyze_results import AnalyseResults
from mlaut.data import Data
import pandas as pd
from mlaut.estimators.estimators import instantiate_default_estimators
from mlaut.analyze_results.scores import ScoreAccuracy

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000

  from ._conv import register_converters as _register_converters


In [2]:
data = Data()
input_io = data.open_hdf5('data/openml.h5', mode='r')
out_io = data.open_hdf5('data/openml-classification.h5', mode='a')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='openml/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
estimators = instantiate_default_estimators(['BaselineClassifier','NeuralNetworkDeepClassifier','K_Neighbours'])
score_accuracy = ScoreAccuracy()

# (errors_per_estimator, 
#  errors_per_dataset_per_estimator) = analyze.prediction_errors(metric=score_accuracy, estimators=estimators)
 
(errors_per_estimator, 
 errors_per_dataset_per_estimator, 
 errors_per_dataset_per_estimator_df) = analyze.prediction_errors(score_accuracy, estimators)

#### Simple average and standard error

In [4]:
avg_and_std_error = analyze.average_and_std_error(errors_per_estimator)
# avg_and_std_error.index.name='Estimator Name'
avg_and_std_error

Unnamed: 0,avg_score,std_error
NeuralNetworkDeepClassifier,0.48931,0.069318
BaselineClassifier,0.565476,0.030917
K_Neighbours,0.917092,0.009172


#### Average Rank

In [None]:
avg_rank = analyze.ranks(errors_per_estimator, ascending=False)
avg_rank

#### Training time

In [None]:
avg_training_time, trainig_time_per_dataset = analyze.average_training_time(estimators)
avg_training_time

#### merge avg score, rank and training time

In [None]:
avg_metrics = pd.DataFrame.merge(avg_rank,avg_and_std_error, left_index=True, right_index=True)
avg_metrics = pd.DataFrame.merge(avg_metrics, avg_training_time,left_index=True, right_index=True)
avg_metrics


#### Cohen's d

In [None]:
cohens_d = analyze.cohens_d(errors_per_estimator)
cohens_d

#### t-test

In [None]:
t_test, t_test_df = analyze.t_test(errors_per_estimator)
t_test_df

#### sign test

In [None]:
sign_test, sign_test_df = analyze.sign_test(errors_per_estimator)
sign_test_df

#### t-test with Bonferroni correction

In [None]:
t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(errors_per_estimator)
t_test_bonferroni_df

#### Wilcoxon test

In [None]:
_, wilcoxon_df_multiindex = analyze.wilcoxon_test(errors_per_estimator)
wilcoxon_df_multiindex

#### Friedman test

In [None]:
friedman_test, friedman_test_df = analyze.friedman_test(errors_per_estimator)
friedman_test_df

In [None]:
nemeniy_test = analyze.nemenyi(errors_per_estimator)
nemeniy_test_df = pd.DataFrame(nemeniy_test)
nemeniy_test_df

### Per dataset

In [5]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std
AP_Breast_Colon,BaselineClassifier,0.524038,0.034629
AP_Breast_Colon,K_Neighbours,0.086538,0.019495
AP_Breast_Colon,NeuralNetworkDeepClassifier,0.528846,0.034611
AP_Breast_Kidney,BaselineClassifier,0.435,0.035055
AP_Breast_Kidney,K_Neighbours,0.045,0.014659
AP_Breast_Kidney,NeuralNetworkDeepClassifier,0.41,0.034778
AP_Breast_Lung,BaselineClassifier,0.391026,0.03907
AP_Breast_Lung,K_Neighbours,0.083333,0.022129
AP_Breast_Lung,NeuralNetworkDeepClassifier,0.50641,0.040029
AP_Breast_Omentum,BaselineClassifier,0.330935,0.039912


## Save tables to $\LaTeX$

In [None]:
#average and standard error
with open('../mlaut_paper/tables/avg_and_st_error.tex', 'w') as tf:
    tf.write(avg_and_std_error.to_latex())
    
#average rank
with open('../mlaut_paper/tables/avg_rank.tex', 'w') as tf:
    tf.write(avg_rank.to_latex())

#average metrics
with open('../mlaut_paper/tables/avg_metrics.tex', 'w') as tf:
    tf.write(avg_metrics.to_latex())
#Cohen's D
with open('../mlaut_paper/tables/cohens_d.tex', 'w') as tf:
    tf.write(cohens_d.to_latex())
#t-test
with open('../mlaut_paper/tables/t_test.tex', 'w') as tf:
    tf.write(t_test_df.to_latex())
with open('../mlaut_paper/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#sign test
with open('../mlaut_paper/tables/sign_test.tex', 'w') as tf:
    tf.write(sign_test_df.to_latex())
#t-test with Bonferroni correction
with open('../mlaut_paper/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#Wilcoxon
with open('../mlaut_paper/tables/wilxocon_test.tex', 'w') as tf:
    tf.write(wilcoxon_df_multiindex.to_latex())
#Friedman test
with open('../mlaut_paper/tables/friedman_test.tex', 'w') as tf:
    tf.write(friedman_test_df.to_latex())
#Nemeniy test
with open('../mlaut_paper/tables/nemeniy_test.tex', 'w') as tf:
    tf.write(nemeniy_test_df.to_latex())
#Errors per dataset per estimator
with open('../mlaut_paper/tables/errors_per_dataset_per_estimator.tex', 'w') as tf:
    tf.write(errors_per_dataset_per_estimator_df.to_latex())