In [1]:
from mleap.analyze_results import AnalyseResults
from mleap.data import Data
import pandas as pd
from mleap.data.estimators import instantiate_default_estimators

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000

  from ._conv import register_converters as _register_converters
  return f(*args, **kwds)


In [2]:
data = Data()
input_io = data.open_hdf5('data/delgado.hdf5', mode='r')
out_io = data.open_hdf5('data/classification.hdf5', mode='a')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='delgado_datasets/', 
                        output_h5_predictions_group='experiments/predictions/')


In [3]:
error_all_datasets = analyze.calculate_error_all_datasets(metric='mean_squared_error')

### All datasets

In [4]:
error_all_datasets = analyze.calculate_error_all_datasets(metric='mean_squared_error')

#### Simple average

In [5]:
res_df = analyze.calculate_average_std(error_all_datasets)
res_df

Unnamed: 0,avg,std
BaggingClassifier,1.046759,2.170619
RandomForestClassifier,1.142671,2.374306
GradientBoostingClassifier,1.468495,3.49137
SVC,2.090772,4.853857
LogisticRegression,2.219204,5.866994
Lasso,2.345978,6.466949
LassoLars,2.418444,6.549009
RidgeRegression,2.423562,6.671876
PassiveAggressiveClassifier,2.781483,6.971238
BernoulliNaiveBayes,3.283329,9.959481


#### Cohen's d

In [6]:
cohens_d = analyze.cohens_d(error_all_datasets)
cohens_d

Unnamed: 0,Cohen's d
BaggingClassifier-BaselineClassifier,0.487072
BaselineClassifier-RandomForestClassifier,-0.478721
BaselineClassifier-GradientBoostingClassifier,-0.448151
BaggingClassifier-GaussianNaiveBayes,0.419429
GaussianNaiveBayes-RandomForestClassifier,-0.399409
BaselineClassifier-SVC,-0.392251
BaggingClassifier-NeuralNetworkDeepClassifier,0.379588
BaselineClassifier-LogisticRegression,-0.376227
NeuralNetworkDeepClassifier-RandomForestClassifier,-0.367934
BaselineClassifier-Lasso,-0.362688


#### t-test

In [7]:
t_test, t_test_df = analyze.t_test(error_all_datasets)
t_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,-2.577343,0.011255
1,BaggingClassifier - BernoulliNaiveBayes,-1.641961,0.103403
2,BaggingClassifier - GaussianNaiveBayes,-2.21941,0.028474
3,BaggingClassifier - GradientBoostingClassifier,-0.767673,0.444297
4,BaggingClassifier - Lasso,-1.425266,0.156861
5,BaggingClassifier - LassoLars,-1.487784,0.139618
6,BaggingClassifier - LogisticRegression,-1.402536,0.163521
7,BaggingClassifier - NeuralNetworkDeepClassifier,-2.008593,0.046986
8,BaggingClassifier - PassiveAggressiveClassifier,-1.777957,0.078125
9,BaggingClassifier - RandomForestClassifier,-0.22311,0.823856


#### sign test

In [8]:
sign_test, sign_test_df = analyze.sign_test(error_all_datasets)
sign_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,-5.508497,3.619105e-08
1,BaggingClassifier - BernoulliNaiveBayes,-2.309375,0.02092278
2,BaggingClassifier - GaussianNaiveBayes,-3.094279,0.001972918
3,BaggingClassifier - GradientBoostingClassifier,-0.691396,0.4893169
4,BaggingClassifier - Lasso,-1.385625,0.1658614
5,BaggingClassifier - LassoLars,-0.835909,0.4032062
6,BaggingClassifier - LogisticRegression,-1.303451,0.1924209
7,BaggingClassifier - NeuralNetworkDeepClassifier,-2.655073,0.007929128
8,BaggingClassifier - PassiveAggressiveClassifier,-1.719988,0.08543453
9,BaggingClassifier - RandomForestClassifier,0.093508,0.9254996


#### t-test with Bonferroni correction

In [9]:
t_test_bonferroni, t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(error_all_datasets)
t_test_bonferroni_df

Unnamed: 0,pair,p_value
0,BaggingClassifier - BaselineClassifier,0.877912
1,BaggingClassifier - BernoulliNaiveBayes,1.0
2,BaggingClassifier - GaussianNaiveBayes,1.0
3,BaggingClassifier - GradientBoostingClassifier,1.0
4,BaggingClassifier - Lasso,1.0
5,BaggingClassifier - LassoLars,1.0
6,BaggingClassifier - LogisticRegression,1.0
7,BaggingClassifier - NeuralNetworkDeepClassifier,1.0
8,BaggingClassifier - PassiveAggressiveClassifier,1.0
9,BaggingClassifier - RandomForestClassifier,1.0


#### Wilcoxon test

In [10]:
wilcoxon_test, wilcoxon_test_df = analyze.wilcoxon_test(error_all_datasets)
wilcoxon_test_df

Unnamed: 0,pair,statistic,p_value
0,BaggingClassifier - BaselineClassifier,0.0,5.143754e-11
1,BaggingClassifier - BernoulliNaiveBayes,215.0,1.978773e-06
2,BaggingClassifier - GaussianNaiveBayes,100.0,1.981287e-08
3,BaggingClassifier - GradientBoostingClassifier,239.0,0.0001196442
4,BaggingClassifier - Lasso,637.0,0.1321655
5,BaggingClassifier - LassoLars,761.0,0.6027771
6,BaggingClassifier - LogisticRegression,390.0,0.006469831
7,BaggingClassifier - NeuralNetworkDeepClassifier,107.0,1.734954e-08
8,BaggingClassifier - PassiveAggressiveClassifier,151.0,9.607049e-07
9,BaggingClassifier - RandomForestClassifier,666.0,0.834089


#### Friedman test

In [11]:
friedman_test, friedman_test_df = analyze.friedman_test(error_all_datasets)
friedman_test_df

Unnamed: 0,statistic,p_value
0,290.869744,3.869762e-55


In [12]:
nemeniy_test = analyze.nemenyi(error_all_datasets)
pd.DataFrame(nemeniy_test)

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,Lasso,LassoLars,LogisticRegression,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RidgeRegression,SVC
BaggingClassifier,-1.0,0.000499,0.957183,0.630756,1.0,0.999847,1.0,0.999895,0.833231,0.997361,1.0,1.0,0.996546
BaselineClassifier,0.000499,-1.0,0.340894,0.8118,0.008225,0.041015,0.008724,0.036415,0.600899,0.111008,0.000383,0.007751,0.122993
BernoulliNaiveBayes,0.957183,0.340894,-1.0,0.999996,0.998778,0.999986,0.998911,0.999978,1.0,1.0,0.947278,0.99863,1.0
GaussianNaiveBayes,0.630756,0.8118,0.999996,-1.0,0.924988,0.989864,0.928861,0.987618,1.0,0.998951,0.597226,0.920969,0.999237
GradientBoostingClassifier,1.0,0.008225,0.998778,0.924988,-1.0,1.0,1.0,1.0,0.984238,0.999995,0.999999,1.0,0.999991
Lasso,0.999847,0.041015,0.999986,0.989864,1.0,-1.0,1.0,1.0,0.999122,1.0,0.999747,1.0,1.0
LassoLars,1.0,0.008724,0.998911,0.928861,1.0,1.0,-1.0,1.0,0.985401,0.999996,0.999999,1.0,0.999992
LogisticRegression,0.999895,0.036415,0.999978,0.987618,1.0,1.0,1.0,-1.0,0.998827,1.0,0.999822,1.0,1.0
NeuralNetworkDeepClassifier,0.833231,0.600899,1.0,1.0,0.984238,0.999122,0.985401,0.998827,-1.0,0.999969,0.809087,0.983002,0.999981
PassiveAggressiveClassifier,0.997361,0.111008,1.0,0.998951,0.999995,1.0,0.999996,1.0,0.999969,-1.0,0.996256,0.999993,1.0


### Per dataset

In [13]:
error_per_dataset, error_per_dataset_df = analyze.calculate_error_per_dataset(metric='mean_squared_error')
error_per_dataset_df

Unnamed: 0,Unnamed: 1,score,std
abalone,BaggingClassifier,0.48876,0.843254
abalone,BaselineClassifier,1.453227,1.55623
abalone,BernoulliNaiveBayes,0.759971,1.194485
abalone,GaussianNaiveBayes,0.612038,0.961718
abalone,GradientBoostingClassifier,0.498912,0.832946
abalone,Lasso,0.413882,0.540242
abalone,LassoLars,0.376236,0.822697
abalone,LogisticRegression,0.51124,0.917391
abalone,NeuralNetworkDeepClassifier,0.556925,0.929838
abalone,PassiveAggressiveClassifier,0.791153,1.244525
