In [1]:
from mlaut.analyze_results import AnalyseResults
from mlaut.data import Data
import pandas as pd
from mlaut.estimators.estimators import instantiate_default_estimators
from mlaut.analyze_results.scores import ScoreAccuracy

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000

  from ._conv import register_converters as _register_converters


In [2]:
data = Data()
input_io = data.open_hdf5('data/openml.h5', mode='r')
out_io = data.open_hdf5('data/openml-classification.h5', mode='a')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='openml/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
estimators = instantiate_default_estimators(['Classification'])
score_accuracy = ScoreAccuracy()

# (errors_per_estimator, 
#  errors_per_dataset_per_estimator) = analyze.prediction_errors(metric=score_accuracy, estimators=estimators)
 
(errors_per_estimator, 
 errors_per_dataset_per_estimator, 
 errors_per_dataset_per_estimator_df) = analyze.prediction_errors(score_accuracy, estimators)

2018-06-10 14:24:44,830 [MainThread  ] [WARNI]  Predictions for estimator SVC unavailable for dataset: GesturePhaseSegmentationProcessed. Analyse Results will skip GesturePhaseSegmentationProcessed.
2018-06-10 14:24:44,830 [MainThread  ] [WARNI]  Predictions for estimator SVC unavailable for dataset: GesturePhaseSegmentationProcessed. Analyse Results will skip GesturePhaseSegmentationProcessed.
2018-06-10 14:24:44,832 [MainThread  ] [WARNI]  Predictions for estimator GaussianNaiveBayes unavailable for dataset: GesturePhaseSegmentationProcessed. Analyse Results will skip GesturePhaseSegmentationProcessed.
2018-06-10 14:24:44,832 [MainThread  ] [WARNI]  Predictions for estimator GaussianNaiveBayes unavailable for dataset: GesturePhaseSegmentationProcessed. Analyse Results will skip GesturePhaseSegmentationProcessed.
2018-06-10 14:24:44,836 [MainThread  ] [WARNI]  Predictions for estimator BernoulliNaiveBayes unavailable for dataset: GesturePhaseSegmentationProcessed. Analyse Results will

#### Simple average and standard error

In [4]:
avg_and_std_error = analyze.average_and_std_error(errors_per_estimator)
# avg_and_std_error.index.name='Estimator Name'
avg_and_std_error

Unnamed: 0,avg_score,std_error
BaselineClassifier,0.560825,0.020387
NeuralNetworkDeepClassifier,0.601127,0.024979
BernoulliNaiveBayes,0.657803,0.017503
SVC,0.675144,0.018987
K_Neighbours,0.838803,0.022794
GaussianNaiveBayes,0.843921,0.026747
PassiveAggressiveClassifier,0.856268,0.025228
GradientBoostingClassifier,0.860161,0.019453
RandomForestClassifier,0.885413,0.018639
BaggingClassifier,0.888241,0.019002


#### Average Rank

In [5]:
avg_rank = analyze.ranks(errors_per_estimator, ascending=False)
avg_rank

Unnamed: 0,avg_rank
BaggingClassifier,2.68
RandomForestClassifier,2.87
PassiveAggressiveClassifier,3.59
GaussianNaiveBayes,4.5
GradientBoostingClassifier,4.73
K_Neighbours,5.07
SVC,6.87
BernoulliNaiveBayes,7.35
NeuralNetworkDeepClassifier,8.15
BaselineClassifier,9.19


#### Training time

In [18]:
avg_training_time, trainig_time_per_dataset = analyze.average_training_time(estimators)
avg_training_time

Unnamed: 0,avg training time (in sec)
BaggingClassifier,31.251519
BaselineClassifier,0.000646
BernoulliNaiveBayes,0.025111
GaussianNaiveBayes,0.013008
GradientBoostingClassifier,48.118384
K_Neighbours,41.03347
NeuralNetworkDeepClassifier,1.781616
PassiveAggressiveClassifier,38.951718
RandomForestClassifier,43.989733
SVC,684.234771


#### merge avg score, rank and training time

In [7]:
avg_metrics = pd.DataFrame.merge(avg_rank,avg_and_std_error, left_index=True, right_index=True)
avg_metrics = pd.DataFrame.merge(avg_metrics, avg_training_time,left_index=True, right_index=True)
avg_metrics


Unnamed: 0,avg_rank,avg_score,std_error,avg training time (in sec)
BaggingClassifier,2.68,0.888241,0.019002,31.251519
RandomForestClassifier,2.87,0.885413,0.018639,43.989733
PassiveAggressiveClassifier,3.59,0.856268,0.025228,38.951718
GaussianNaiveBayes,4.5,0.843921,0.026747,0.013008
GradientBoostingClassifier,4.73,0.860161,0.019453,48.118384
K_Neighbours,5.07,0.838803,0.022794,41.03347
SVC,6.87,0.675144,0.018987,684.234771
BernoulliNaiveBayes,7.35,0.657803,0.017503,0.025111
NeuralNetworkDeepClassifier,8.15,0.601127,0.024979,1.781616
BaselineClassifier,9.19,0.560825,0.020387,0.000646


#### Cohen's d

In [8]:
cohens_d = analyze.cohens_d(errors_per_estimator)
cohens_d

estimator_2,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,RandomForestClassifier,SVC
estimator_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
BaggingClassifier,-2.34966,-1.78396,-0.270165,-0.206515,-0.333192,-1.82963,-0.202466,-0.0212487,-1.586587
BaselineClassifier,,0.721849,1.68358,2.12456,1.818,0.249991,1.82171,2.3501,0.820695
BernoulliNaiveBayes,,,1.16453,1.54662,1.25964,-0.371642,1.29272,1.78036,0.134305
GaussianNaiveBayes,,,,0.0982105,-0.0291291,-1.32686,0.0671604,0.254545,-1.029104
GradientBoostingClassifier,,,,,-0.142552,-1.63635,-0.0244456,0.187453,-1.361264
K_Neighbours,,,,,,-1.40572,0.102734,0.316595,-1.103349
NeuralNetworkDeepClassifier,,,,,,,1.43733,1.82429,0.471811
PassiveAggressiveClassifier,,,,,,,,0.185834,-1.147268
RandomForestClassifier,,,,,,,,,-1.580547


#### t-test

In [9]:
t_test, t_test_df = analyze.t_test(errors_per_estimator)
t_test_df

Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NeuralNetworkDeepClassifier,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val
BaggingClassifier,0.0,1.0,11.630227,3.682695e-20,8.830166,4.169026e-14,1.337248,0.1842372,1.022195,0.3092054,1.649216,0.1023054,9.056194,1.351684e-14,1.002157,0.3187361,0.105176,0.9164513,7.853203,5.210719e-12
BaselineClassifier,-11.630227,3.682695e-20,0.0,1.0,-3.572969,0.0005494195,-8.333276,4.908401e-13,-10.516055,9.208801000000001e-18,-8.998659,1.800852e-14,-1.23739,0.2188989,-9.017018,1.64333e-14,-11.632411,3.643322e-20,-4.062234,9.802301e-05
BernoulliNaiveBayes,-8.830166,4.169026e-14,3.572969,0.0005494195,0.0,1.0,-5.76415,9.521242e-08,-7.655367,1.368733e-11,-6.234898,1.144367e-08,1.839537,0.06886362,-6.39863,5.393426e-09,-8.812353,4.555556e-14,-0.664776,0.5077554
GaussianNaiveBayes,-1.337248,0.1842372,8.333276,4.908401e-13,5.76415,9.521242e-08,0.0,1.0,-0.486117,0.6279695,0.144182,0.8856531,6.567637,2.462658e-09,-0.332427,0.7402768,-1.259932,0.210686,5.093807,1.703028e-06
GradientBoostingClassifier,-1.022195,0.3092054,10.516055,9.208801000000001e-18,7.655367,1.368733e-11,0.486117,0.6279695,0.0,1.0,0.705595,0.4821141,8.099522,1.555219e-12,0.121,0.903939,-0.927847,0.3557665,6.737912,1.10992e-09
K_Neighbours,-1.649216,0.1023054,8.998659,1.800852e-14,6.234898,1.144367e-08,-0.144182,0.8856531,-0.705595,0.4821141,0.0,1.0,6.957956,3.924536e-10,-0.508505,0.612242,-1.567066,0.1203227,5.461299,3.578757e-07
NeuralNetworkDeepClassifier,-9.056194,1.351684e-14,1.23739,0.2188989,-1.839537,0.06886362,-6.567637,2.462658e-09,-8.099522,1.555219e-12,-6.957956,3.924536e-10,0.0,1.0,-7.114416,1.862461e-10,-9.029757,1.542182e-14,-2.335344,0.02156443
PassiveAggressiveClassifier,-1.002157,0.3187361,9.017018,1.64333e-14,6.39863,5.393426e-09,0.332427,0.7402768,-0.121,0.903939,0.508505,0.612242,7.114416,1.862461e-10,0.0,1.0,-0.919831,0.3599197,5.678688,1.388024e-07
RandomForestClassifier,-0.105176,0.9164513,11.632411,3.643322e-20,8.812353,4.555556e-14,1.259932,0.210686,0.927847,0.3557665,1.567066,0.1203227,9.029757,1.542182e-14,0.919831,0.3599197,0.0,1.0,7.82331,6.031334e-12
SVC,-7.853203,5.210719e-12,4.062234,9.802301e-05,0.664776,0.5077554,-5.093807,1.703028e-06,-6.737912,1.10992e-09,-5.461299,3.578757e-07,2.335344,0.02156443,-5.678688,1.388024e-07,-7.82331,6.031334e-12,0.0,1.0


#### sign test

In [10]:
sign_test, sign_test_df = analyze.sign_test(errors_per_estimator)
sign_test_df

Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NeuralNetworkDeepClassifier,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val
BaggingClassifier,0.0,1.0,7.517711,5.574373e-14,6.690452,2.22482e-11,1.140927,0.2539002,1.733796,0.08295434,1.847544,0.06466838,7.004121,2.485413e-12,0.596315,0.5509645,0.365372,0.7148334,6.449168,1.124655e-10
BaselineClassifier,-7.517711,5.574373e-14,0.0,1.0,-3.15737,0.001591995,-6.42504,1.318347e-10,-7.307449,2.722615e-13,-6.593939,4.283086e-11,-1.499406,0.1337684,-6.593939,4.283086e-11,-7.569414,3.749109e-14,-3.905349,9.408951e-05
BernoulliNaiveBayes,-6.690452,2.22482e-11,3.15737,0.001591995,0.0,1.0,-5.446118,5.148108e-08,-6.218226,5.028085e-10,-5.528844,3.223487e-08,1.375317,0.1690332,-5.773574,7.760743e-09,-6.70424,2.024575e-11,-0.782449,0.433951
GaussianNaiveBayes,-1.140927,0.2539002,6.42504,1.318347e-10,5.446118,5.148108e-08,0.0,1.0,0.592869,0.5532692,0.854834,0.3926432,5.908004,3.462784e-09,-0.437758,0.661562,-0.820365,0.4120083,5.191046,2.091155e-07
GradientBoostingClassifier,-1.733796,0.08295434,7.307449,2.722615e-13,6.218226,5.028085e-10,-0.592869,0.5532692,0.0,1.0,0.465333,0.6416931,6.652536,2.88085e-11,-0.885856,0.3756952,-1.458043,0.1448287,5.880428,4.092061e-09
K_Neighbours,-1.847544,0.06466838,6.593939,4.283086e-11,5.528844,3.223487e-08,-0.854834,0.3926432,-0.465333,0.6416931,0.0,1.0,6.011411,1.839156e-09,-1.237441,0.2159235,-1.602813,0.1089759,5.180706,2.210481e-07
NeuralNetworkDeepClassifier,-7.004121,2.485413e-12,1.499406,0.1337684,-1.375317,0.1690332,-5.908004,3.462784e-09,-6.652536,2.88085e-11,-6.011411,1.839156e-09,0.0,1.0,-6.207885,5.370249e-10,-7.028249,2.091409e-12,-2.033677,0.04198419
PassiveAggressiveClassifier,-0.596315,0.5509645,6.593939,4.283086e-11,5.773574,7.760743e-09,0.437758,0.661562,0.885856,0.3756952,1.237441,0.2159235,6.207885,5.370249e-10,0.0,1.0,-0.351585,0.7251497,5.552972,2.808532e-08
RandomForestClassifier,-0.365372,0.7148334,7.569414,3.749109e-14,6.70424,2.024575e-11,0.820365,0.4120083,1.458043,0.1448287,1.602813,0.1089759,7.028249,2.091409e-12,0.351585,0.7251497,0.0,1.0,6.438828,1.203997e-10
SVC,-6.449168,1.124655e-10,3.905349,9.408951e-05,0.782449,0.433951,-5.191046,2.091155e-07,-5.880428,4.092061e-09,-5.180706,2.210481e-07,2.033677,0.04198419,-5.552972,2.808532e-08,-6.438828,1.203997e-10,0.0,1.0


#### t-test with Bonferroni correction

In [11]:
t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(errors_per_estimator)
t_test_bonferroni_df

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,False,True,True,False,False,False,True,False,False,True
BaselineClassifier,True,False,False,True,True,True,False,True,True,True
BernoulliNaiveBayes,True,False,False,True,True,True,False,True,True,False
GaussianNaiveBayes,False,True,True,False,False,False,True,False,False,True
GradientBoostingClassifier,False,True,True,False,False,False,True,False,False,True
K_Neighbours,False,True,True,False,False,False,True,False,False,True
NeuralNetworkDeepClassifier,True,False,False,True,True,True,False,True,True,False
PassiveAggressiveClassifier,False,True,True,False,False,False,True,False,False,True
RandomForestClassifier,False,True,True,False,False,False,True,False,False,True
SVC,True,True,False,True,True,True,False,True,True,False


#### Wilcoxon test

In [12]:
_, wilcoxon_df_multiindex = analyze.wilcoxon_test(errors_per_estimator)
wilcoxon_df_multiindex

  z = (T - mn - correction) / se


Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NeuralNetworkDeepClassifier,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val
BaggingClassifier,0.0,,1.0,8.029281e-10,10.0,2.056477e-09,144.0,4.19919e-05,12.0,3.897951e-08,23.0,8.423214e-08,3.0,1.972217e-09,222.5,0.01941369,287.0,0.469852,16.0,4.444366e-09
BaselineClassifier,1.0,8.029281e-10,0.0,,86.0,1.629863e-07,24.0,3.174959e-09,1.0,1.181233e-09,8.0,1.226241e-09,313.0,0.002889708,32.5,5.21064e-09,0.0,7.556929e-10,68.0,3.851212e-08
BernoulliNaiveBayes,10.0,2.056477e-09,86.0,1.629863e-07,0.0,,115.5,4.678707e-07,24.0,4.79938e-09,59.0,9.090374e-08,103.0,0.02277773,82.0,1.312874e-07,3.0,1.337087e-09,24.0,0.1329573
GaussianNaiveBayes,144.0,4.19919e-05,24.0,3.174959e-09,115.5,4.678707e-07,0.0,,574.0,0.8858215,440.0,0.5209637,49.5,1.377512e-08,331.5,0.03577126,221.5,0.0008343457,172.0,7.002871e-06
GradientBoostingClassifier,12.0,3.897951e-08,1.0,1.181233e-09,24.0,4.79938e-09,574.0,0.8858215,0.0,,333.5,0.02372403,13.0,1.654806e-09,326.5,0.1180589,80.0,3.398935e-06,70.0,6.79885e-08
K_Neighbours,23.0,8.423214e-08,8.0,1.226241e-09,59.0,9.090374e-08,440.0,0.5209637,333.5,0.02372403,0.0,,15.0,4.176698e-09,219.0,0.002161914,50.0,2.066703e-07,115.0,2.019906e-06
NeuralNetworkDeepClassifier,3.0,1.972217e-09,313.0,0.002889708,103.0,0.02277773,49.5,1.377512e-08,13.0,1.654806e-09,15.0,4.176698e-09,0.0,,46.0,2.712509e-08,1.0,1.181233e-09,71.0,0.0005232002
PassiveAggressiveClassifier,222.5,0.01941369,32.5,5.21064e-09,82.0,1.312874e-07,331.5,0.03577126,326.5,0.1180589,219.0,0.002161914,46.0,2.712509e-08,0.0,,258.5,0.04171226,129.0,1.51273e-06
RandomForestClassifier,287.0,0.469852,0.0,7.556929e-10,3.0,1.337087e-09,221.5,0.0008343457,80.0,3.398935e-06,50.0,2.066703e-07,1.0,1.181233e-09,258.5,0.04171226,0.0,,12.0,3.467697e-09
SVC,16.0,4.444366e-09,68.0,3.851212e-08,24.0,0.1329573,172.0,7.002871e-06,70.0,6.79885e-08,115.0,2.019906e-06,71.0,0.0005232002,129.0,1.51273e-06,12.0,3.467697e-09,0.0,


#### Friedman test

In [13]:
friedman_test, friedman_test_df = analyze.friedman_test(errors_per_estimator)
friedman_test_df

Unnamed: 0,statistic,p_value
0,261.467136,3.771694e-51


In [14]:
nemeniy_test = analyze.nemenyi(errors_per_estimator)
nemeniy_test_df = pd.DataFrame(nemeniy_test)
nemeniy_test_df

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,-1.0,9.770686e-12,2e-06,0.99841,0.9982158,0.9782782,1.067945e-08,0.9999413,1.0,1.8e-05
BaselineClassifier,9.770686e-12,-1.0,0.953511,2.78169e-08,3.09146e-08,4.985285e-07,0.9994875,2.3841e-09,3.629285e-11,0.821255
BernoulliNaiveBayes,1.503125e-06,0.9535112,-1.0,0.0004245506,0.0004559163,0.002843343,0.9998985,7.837964e-05,3.934614e-06,0.999999
GaussianNaiveBayes,0.99841,2.78169e-08,0.000425,-1.0,1.0,0.9999991,8.684415e-06,0.9999999,0.9995883,0.002767
GradientBoostingClassifier,0.9982158,3.09146e-08,0.000456,1.0,-1.0,0.9999993,9.473927e-06,0.9999998,0.9995259,0.002944
K_Neighbours,0.9782782,4.985285e-07,0.002843,0.9999991,0.9999993,-1.0,9.138695e-05,0.9998588,0.9904547,0.014219
NeuralNetworkDeepClassifier,1.067945e-08,0.9994875,0.999899,8.684415e-06,9.473927e-06,9.138695e-05,-1.0,1.127913e-06,3.280185e-08,0.995981
PassiveAggressiveClassifier,0.9999413,2.3841e-09,7.8e-05,0.9999999,0.9999998,0.9998588,1.127913e-06,-1.0,0.9999937,0.000628
RandomForestClassifier,1.0,3.629285e-11,4e-06,0.9995883,0.9995259,0.9904547,3.280185e-08,0.9999937,-1.0,4.3e-05
SVC,1.81712e-05,0.8212545,0.999999,0.002767068,0.002944233,0.01421924,0.9959812,0.0006276147,4.333338e-05,-1.0


### Per dataset

In [15]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std
AP_Breast_Colon,BaggingClassifier,0.048077,0.014833
AP_Breast_Colon,BaselineClassifier,0.514423,0.034654
AP_Breast_Colon,BernoulliNaiveBayes,0.471154,0.034611
AP_Breast_Colon,GaussianNaiveBayes,0.072115,0.017936
AP_Breast_Colon,GradientBoostingClassifier,0.057692,0.016167
AP_Breast_Colon,K_Neighbours,0.086538,0.019495
AP_Breast_Colon,NeuralNetworkDeepClassifier,0.528846,0.034611
AP_Breast_Colon,PassiveAggressiveClassifier,0.057692,0.016167
AP_Breast_Colon,RandomForestClassifier,0.057692,0.016167
AP_Breast_Colon,SVC,0.471154,0.034611


## Save tables to $\LaTeX$

In [16]:
#average and standard error
with open('../mlaut_paper/tables/avg_and_st_error.tex', 'w') as tf:
    tf.write(avg_and_std_error.to_latex())
    
#average rank
with open('../mlaut_paper/tables/avg_rank.tex', 'w') as tf:
    tf.write(avg_rank.to_latex())

#average metrics
with open('../mlaut_paper/tables/avg_metrics.tex', 'w') as tf:
    tf.write(avg_metrics.to_latex())
#Cohen's D
with open('../mlaut_paper/tables/cohens_d.tex', 'w') as tf:
    tf.write(cohens_d.to_latex())
#t-test
with open('../mlaut_paper/tables/t_test.tex', 'w') as tf:
    tf.write(t_test_df.to_latex())
with open('../mlaut_paper/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#sign test
with open('../mlaut_paper/tables/sign_test.tex', 'w') as tf:
    tf.write(sign_test_df.to_latex())
#t-test with Bonferroni correction
with open('../mlaut_paper/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#Wilcoxon
with open('../mlaut_paper/tables/wilxocon_test.tex', 'w') as tf:
    tf.write(wilcoxon_df_multiindex.to_latex())
#Friedman test
with open('../mlaut_paper/tables/friedman_test.tex', 'w') as tf:
    tf.write(friedman_test_df.to_latex())
#Nemeniy test
with open('../mlaut_paper/tables/nemeniy_test.tex', 'w') as tf:
    tf.write(nemeniy_test_df.to_latex())
#Errors per dataset per estimator
with open('../mlaut_paper/tables/errors_per_dataset_per_estimator.tex', 'w') as tf:
    tf.write(errors_per_dataset_per_estimator_df.to_latex())