In [1]:
import pandas as pd
import numpy as np

In [2]:
# add parent folder path where lib folder is
import sys
if ".." not in sys.path:import sys; sys.path.insert(0, '..') 

In [3]:
# nonparametric tests for one performance measure (e.g., AUC)
from P_HAKN import run_friedman, ph_pvals, ap2h0

In [4]:
alpha = 0.05   # Set this to change the default signifance level

#### Read in our data

In [5]:
# In the csv, the columns are classifiers and the rows are datasets 
full_df = pd.read_csv("../datasets/gmeans.csv", index_col=0)

In [6]:
# All_vs_One: list of "control" classifiers
#avo = []
avo = ['RF']
#avo = ['RF','XGB']

In [7]:
if len(avo) == 0:
    oname = "All_Models"
    df = full_df
else:
    baseclf = tuple(avo)
    oname=""
    for x in range(len(avo)):
        oname += avo[x] + "_"
    oname += "Models"
    df = full_df.loc[:, full_df.columns.str.startswith(baseclf)]

#### Friedman test 
Checks if there is a significant difference in performance for any classifier<br>
If we reject H0 (no difference), we use the post-hoc test to find out which differences are significant.

In [8]:
reject, rptstr, rankings = run_friedman(df)
print(oname,":",rptstr)

# continue only if H0 was rejected
if not reject:
    raise Exception("Accepted H0 for Freidman Test")

RF_Models : Freidman Test
H0: there is no difference in the means at the 95.0% confidence level 
Reject: Ready to continue with post-hoc tests


#### All vs. All Tests
Compare every classifier to every other one, using the rankings ('pivotal values')  returned from the Freidman test.<br>
General case shows p_values adjusted for multiple tests using a range of methods, Nemenyi and Shaffer show p_values adjusted using similar methods.Note that for technical reasons, the Schaffer method should not be used for more than 18 classifiers.<br>
The dataframe of adjusted p_values can be quickly converted to show if the Null Hypothesis (H0: No significant difference) should be accepted (True) or rejected (False).

In [9]:
gen_pvals_df = ph_pvals(rankings)
gen_pvals_df

Unnamed: 0,p_noadj,ap_BDun,ap_Sdak,ap_Holm,ap_Finr,ap_Hoch,ap_Li
RF // RF+RUS,0.000181,0.006523,0.006502,0.006523,0.006502,0.006523,0.001761
RF // RF+Fse+RUS,0.001249,0.044958,0.043989,0.043709,0.022242,0.04246,0.012011
RF+FSe // RF+RUS,0.001249,0.044958,0.043989,0.043709,0.022242,0.04246,0.012011
RF+FS // RF+RUS,0.001946,0.070048,0.067714,0.064211,0.022242,0.064211,0.01859
RF // RF+Fse+SMOTE,0.004509,0.162313,0.150137,0.144278,0.032012,0.144278,0.042047
RF+FSe // RF+Fse+RUS,0.006706,0.241426,0.215132,0.207895,0.039569,0.201188,0.061285
RF+ROS // RF+RUS,0.006706,0.241426,0.215132,0.207895,0.039569,0.201188,0.061285
RF+FS // RF+Fse+RUS,0.009823,0.353638,0.299097,0.284875,0.043451,0.284875,0.087284
RF // RF+SMOTE,0.020137,0.724923,0.519208,0.563829,0.078147,0.543692,0.163903
RF+FSe // RF+Fse+SMOTE,0.020137,0.724923,0.519208,0.563829,0.078147,0.543692,0.163903


In [10]:
gen_ho_df = ap2h0(gen_pvals_df)
gen_ho_df

Unnamed: 0,p_noadj,H0: BDun,H0: Sdak,H0: Holm,H0: Finr,H0: Hoch,H0: Li
RF // RF+RUS,False,False,False,False,False,False,False
RF // RF+Fse+RUS,False,False,False,False,False,False,False
RF+FSe // RF+RUS,False,False,False,False,False,False,False
RF+FS // RF+RUS,False,True,True,True,False,True,False
RF // RF+Fse+SMOTE,False,True,True,True,False,True,False
RF+FSe // RF+Fse+RUS,False,True,True,True,False,True,True
RF+ROS // RF+RUS,False,True,True,True,False,True,True
RF+FS // RF+Fse+RUS,False,True,True,True,False,True,True
RF // RF+SMOTE,False,True,True,True,True,True,True
RF+FSe // RF+Fse+SMOTE,False,True,True,True,True,True,True


In [11]:
nym_ap_df=ph_pvals(rankings,nmyi=True)
nym_ap_df

Unnamed: 0,p_noadj,ap_Nymi,ap_BDun,ap_Sdak
RF // RF+RUS,0.000181,0.005671,0.006523,0.006502
RF // RF+Fse+RUS,0.001249,0.034073,0.044958,0.043989
RF+FSe // RF+RUS,0.001249,0.034073,0.044958,0.043989
RF+FS // RF+RUS,0.001946,0.050507,0.070048,0.067714
RF // RF+Fse+SMOTE,0.004509,0.103887,0.162313,0.150137
RF+FSe // RF+Fse+RUS,0.006706,0.143478,0.241426,0.215132
RF+ROS // RF+RUS,0.006706,0.143478,0.241426,0.215132
RF+FS // RF+Fse+RUS,0.009823,0.19389,0.353638,0.299097
RF // RF+SMOTE,0.020137,0.327437,0.724923,0.519208
RF+FSe // RF+Fse+SMOTE,0.020137,0.327437,0.724923,0.519208


#### One vs All (Control vs Treatment) 
In some cases, we do not care about all pairwise comparisons as we only propose a single method, or just need to compare to a baseline method. In this case we designate a control method, and compare all others to it.<br>
For statistical reasons the Nemenyi test should not be used for this.

In [12]:
xgb_ap_df = ph_pvals(rankings,control='RF')
xgb_ap_df

Unnamed: 0,p_noadj,ap_BDun,ap_Sdak,ap_Holm,ap_Finr,ap_Hoch,ap_Li
RF // RF+RUS,0.000181,0.00145,0.001449,0.00145,0.001449,0.00145,0.000459
RF // RF+Fse+RUS,0.001249,0.009991,0.009947,0.008742,0.004986,0.008742,0.003156
RF // RF+Fse+SMOTE,0.004509,0.03607,0.035505,0.027052,0.011978,0.027052,0.011302
RF // RF+SMOTE,0.020137,0.161094,0.150186,0.100684,0.039868,0.100684,0.048574
RF // RF+Fse+ROS,0.052808,0.42246,0.352103,0.21123,0.083144,0.21123,0.118077
RF // RF+ROS,0.3017,1.0,0.943462,0.905099,0.380478,0.605577,0.4334
RF // RF+FS,0.518605,1.0,0.997116,1.0,0.566345,0.605577,0.568005
RF // RF+FSe,0.605577,1.0,0.999414,1.0,0.605577,0.605577,0.605577


In [13]:
xgb_ho_df = ap2h0(xgb_ap_df)
xgb_ho_df

Unnamed: 0,p_noadj,H0: BDun,H0: Sdak,H0: Holm,H0: Finr,H0: Hoch,H0: Li
RF // RF+RUS,False,False,False,False,False,False,False
RF // RF+Fse+RUS,False,False,False,False,False,False,False
RF // RF+Fse+SMOTE,False,False,False,False,False,False,False
RF // RF+SMOTE,False,True,True,True,False,True,False
RF // RF+Fse+ROS,True,True,True,True,True,True,True
RF // RF+ROS,True,True,True,True,True,True,True
RF // RF+FS,True,True,True,True,True,True,True
RF // RF+FSe,True,True,True,True,True,True,True


In [14]:
shaf_ap_xgb_df = ph_pvals(rankings,shaf=True,control='RF')
shaf_ap_xgb_df

Unnamed: 0,p_noadj,ap_Shaf,ap_Holm,ap_Finr
RF // RF+RUS,0.000181,0.001087,0.00145,0.001449
RF // RF+Fse+RUS,0.001249,0.007493,0.008742,0.004986
RF // RF+Fse+SMOTE,0.004509,0.027052,0.027052,0.011978
RF // RF+SMOTE,0.020137,0.06041,0.100684,0.039868
RF // RF+Fse+ROS,0.052808,0.158423,0.21123,0.083144
RF // RF+ROS,0.3017,0.905099,0.905099,0.380478
RF // RF+FS,0.518605,1.0,1.0,0.566345
RF // RF+FSe,0.605577,1.0,1.0,0.605577
