In [1]:
from mlaut.analyze_results import AnalyseResults
from mlaut.data import Data
import pandas as pd
from mlaut.estimators.estimators import instantiate_default_estimators

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000

  from ._conv import register_converters as _register_converters


In [2]:
data = Data()
input_io = data.open_hdf5('data/openml.h5', mode='r')
out_io = data.open_hdf5('data/openml-classification.h5', mode='a')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='openml/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
(errors_per_estimator, 
 errors_per_dataset_per_estimator, 
 errors_per_dataset_per_estimator_df) = analyze.prediction_errors(metric='accuracy')

#### Simple average and standard error

In [4]:
res_df = analyze.average_and_std_error(errors_per_estimator)
res_df

Unnamed: 0,avg,std_error
BaselineRegressor,0.5,0.0
BaselineClassifier,0.529412,0.0
K_Means,0.676471,0.0
NeuralNetworkDeepClassifier,0.764706,0.0
NeuralNetworkDeepRegressor,0.794118,0.0
BaggingClassifier,1.0,0.0
BaggingRegressor,1.0,0.0
BernoulliNaiveBayes,1.0,0.0
GaussianNaiveBayes,1.0,0.0
GradientBoostingClassifier,1.0,0.0


#### Average Rank

In [5]:
analyze.ranks(errors_per_estimator, ascending=False)

Unnamed: 0,avg_rank
BaggingClassifier,7.5
RandomForestRegressor,7.5
RandomForestClassifier,7.5
PassiveAggressiveClassifier,7.5
LogisticRegression,7.5
LassoLars,7.5
RidgeRegression,7.5
Lasso,7.5
GradientBoostingClassifier,7.5
GaussianNaiveBayes,7.5


#### Cohen's d

In [6]:
cohens_d = analyze.cohens_d(errors_per_estimator)
cohens_d

  SDpooled = np.sqrt(((n1-1)*v1 + (n2-1)*v2)/(n1+n2-2))


Unnamed: 0,Cohen's d
BaggingClassifier-BaggingRegressor,
BaggingClassifier-BaselineClassifier,
BaggingClassifier-BaselineRegressor,
BaggingClassifier-BernoulliNaiveBayes,
BaggingClassifier-GaussianNaiveBayes,
BaggingClassifier-GradientBoostingClassifier,
BaggingClassifier-GradientBoostingRegressor,
BaggingClassifier-K_Means,
BaggingClassifier-Lasso,
BaggingClassifier-LassoLars,


#### t-test

In [7]:
t_test, t_test_df = analyze.t_test(errors_per_estimator)
t_test_df

  **kwargs)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaggingRegressor,,
1,BaggingClassifier - BaselineClassifier,,
2,BaggingClassifier - BaselineRegressor,,
3,BaggingClassifier - BernoulliNaiveBayes,,
4,BaggingClassifier - GaussianNaiveBayes,,
5,BaggingClassifier - GradientBoostingClassifier,,
6,BaggingClassifier - GradientBoostingRegressor,,
7,BaggingClassifier - K_Means,,
8,BaggingClassifier - Lasso,,
9,BaggingClassifier - LassoLars,,


#### sign test

In [8]:
sign_test, sign_test_df = analyze.sign_test(errors_per_estimator)
sign_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaggingRegressor,0.0,1.0
1,BaggingClassifier - BaselineClassifier,1.0,0.317311
2,BaggingClassifier - BaselineRegressor,1.0,0.317311
3,BaggingClassifier - BernoulliNaiveBayes,0.0,1.0
4,BaggingClassifier - GaussianNaiveBayes,0.0,1.0
5,BaggingClassifier - GradientBoostingClassifier,0.0,1.0
6,BaggingClassifier - GradientBoostingRegressor,0.0,1.0
7,BaggingClassifier - K_Means,1.0,0.317311
8,BaggingClassifier - Lasso,0.0,1.0
9,BaggingClassifier - LassoLars,0.0,1.0


#### t-test with Bonferroni correction

In [9]:
t_test_bonferroni, t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(errors_per_estimator)
t_test_bonferroni_df

  **kwargs)
  ret = ret.dtype.type(ret / rcount)
  reject = pvals <= alphacBonf
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,pair,p_value
0,BaggingClassifier - BaggingRegressor,
1,BaggingClassifier - BaselineClassifier,
2,BaggingClassifier - BaselineRegressor,
3,BaggingClassifier - BernoulliNaiveBayes,
4,BaggingClassifier - GaussianNaiveBayes,
5,BaggingClassifier - GradientBoostingClassifier,
6,BaggingClassifier - GradientBoostingRegressor,
7,BaggingClassifier - K_Means,
8,BaggingClassifier - Lasso,
9,BaggingClassifier - LassoLars,


#### Wilcoxon test

In [10]:
wilcoxon_test, wilcoxon_test_df = analyze.wilcoxon_test(errors_per_estimator)
wilcoxon_test_df

  z = (T - mn - correction) / se


Unnamed: 0,pair,statistic,p_value
0,BaggingClassifier - BaggingRegressor,0.0,
1,BaggingClassifier - BaselineClassifier,0.0,0.317311
2,BaggingClassifier - BaselineRegressor,0.0,0.317311
3,BaggingClassifier - BernoulliNaiveBayes,0.0,
4,BaggingClassifier - GaussianNaiveBayes,0.0,
5,BaggingClassifier - GradientBoostingClassifier,0.0,
6,BaggingClassifier - GradientBoostingRegressor,0.0,
7,BaggingClassifier - K_Means,0.0,0.317311
8,BaggingClassifier - Lasso,0.0,
9,BaggingClassifier - LassoLars,0.0,


#### Friedman test

In [11]:
friedman_test, friedman_test_df = analyze.friedman_test(errors_per_estimator)
friedman_test_df

Unnamed: 0,statistic,p_value
0,18.0,0.455653


In [12]:
nemeniy_test = analyze.nemenyi(errors_per_estimator)
pd.DataFrame(nemeniy_test)

Unnamed: 0,BaggingClassifier,BaggingRegressor,BaselineClassifier,BaselineRegressor,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingRegressor,K_Means,Lasso,LassoLars,LogisticRegression,NeuralNetworkDeepClassifier,NeuralNetworkDeepRegressor,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestRegressor,RidgeRegression,SVC
BaggingClassifier,-1.0,1.0,0.999979,0.999916,1.0,1.0,1.0,1.0,0.999996,1.0,1.0,1.0,0.999999,1.0,1.0,1.0,1.0,1.0,1.0
BaggingRegressor,1.0,-1.0,0.999979,0.999916,1.0,1.0,1.0,1.0,0.999996,1.0,1.0,1.0,0.999999,1.0,1.0,1.0,1.0,1.0,1.0
BaselineClassifier,0.999979,0.999979,-1.0,1.0,0.999979,0.999979,0.999979,0.999979,1.0,0.999979,0.999979,0.999979,1.0,1.0,0.999979,0.999979,0.999979,0.999979,0.999979
BaselineRegressor,0.999916,0.999916,1.0,-1.0,0.999916,0.999916,0.999916,0.999916,1.0,0.999916,0.999916,0.999916,1.0,1.0,0.999916,0.999916,0.999916,0.999916,0.999916
BernoulliNaiveBayes,1.0,1.0,0.999979,0.999916,-1.0,1.0,1.0,1.0,0.999996,1.0,1.0,1.0,0.999999,1.0,1.0,1.0,1.0,1.0,1.0
GaussianNaiveBayes,1.0,1.0,0.999979,0.999916,1.0,-1.0,1.0,1.0,0.999996,1.0,1.0,1.0,0.999999,1.0,1.0,1.0,1.0,1.0,1.0
GradientBoostingClassifier,1.0,1.0,0.999979,0.999916,1.0,1.0,-1.0,1.0,0.999996,1.0,1.0,1.0,0.999999,1.0,1.0,1.0,1.0,1.0,1.0
GradientBoostingRegressor,1.0,1.0,0.999979,0.999916,1.0,1.0,1.0,-1.0,0.999996,1.0,1.0,1.0,0.999999,1.0,1.0,1.0,1.0,1.0,1.0
K_Means,0.999996,0.999996,1.0,1.0,0.999996,0.999996,0.999996,0.999996,-1.0,0.999996,0.999996,0.999996,1.0,1.0,0.999996,0.999996,0.999996,0.999996,0.999996
Lasso,1.0,1.0,0.999979,0.999916,1.0,1.0,1.0,1.0,0.999996,-1.0,1.0,1.0,0.999999,1.0,1.0,1.0,1.0,1.0,1.0


### Per dataset

In [13]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std
zoo,BaggingClassifier,0.0,0.0
zoo,BaggingRegressor,0.0,0.0
zoo,BaselineClassifier,0.470588,0.085601
zoo,BaselineRegressor,0.5,0.085749
zoo,BernoulliNaiveBayes,0.0,0.0
zoo,GaussianNaiveBayes,0.0,0.0
zoo,GradientBoostingClassifier,0.0,0.0
zoo,GradientBoostingRegressor,0.0,0.0
zoo,K_Means,0.323529,0.080231
zoo,Lasso,0.0,0.0


In [14]:
X_train, X_test, y_train, y_test = data.load_test_train_dts(hdf5_out=out_io, 
                                                            hdf5_in=input_io, 
                                                            dts_name='zoo', 
                                                            dts_grp_path='/openml')