In [1]:
import pandas as pd

___

# Loading Data

In [2]:
root = "../../"

bin_dep = "pickles/binary/cross_validation/label_dependent/"
mul_dep = "pickles/multiclass_categorical/cross_validation/label_dependent/"
ovr_dep = "pickles/multiclass_one_vs_rest/cross_validation/label_dependent/"
ovo_dep = "pickles/multiclass_one_vs_one/cross_validation/label_dependent/"

### Binary Classifiers

In [3]:
bin_anova = pd.read_pickle(root + bin_dep + "anova.pkl")
bin_mutual_info = pd.read_pickle(root + bin_dep + "mutual_info.pkl")
bin_relieff = pd.read_pickle(root + bin_dep + "relieff.pkl")
bin_xgb_gain = pd.read_pickle(root + bin_dep + "xgb_gain.pkl")
bin_rfe = pd.read_pickle(root + bin_dep + "rfe.pkl")
bin_wrfs = pd.read_pickle(root + bin_dep + "wrfs.pkl")

### Multiclass Classifiers (Categorical Cross Entropy)

In [4]:
mul_anova = pd.read_pickle(root + mul_dep + "anova.pkl")
mul_mutual_info = pd.read_pickle(root + mul_dep + "mutual_info.pkl")
mul_relieff = pd.read_pickle(root + mul_dep + "relieff.pkl")
mul_xgb_gain = pd.read_pickle(root + mul_dep + "xgb_gain.pkl")
mul_rfe = pd.read_pickle(root + mul_dep + "rfe.pkl")
mul_wrfs = pd.read_pickle(root + mul_dep + "wrfs.pkl")

### Multiclass Classifiers (One vs Rest)


In [5]:
ovr_anova = pd.read_pickle(root + ovr_dep + "anova.pkl")
ovr_mutual_info = pd.read_pickle(root + ovr_dep + "mutual_info.pkl")
ovr_relieff = pd.read_pickle(root + ovr_dep + "relieff.pkl")
ovr_xgb_gain = pd.read_pickle(root + ovr_dep + "xgb_gain.pkl")
ovr_rfe = pd.read_pickle(root + ovr_dep + "rfe.pkl")
ovr_wrfs = pd.read_pickle(root + ovr_dep + "wrfs.pkl")

### Multiclass Classifiers (One vs One)

In [6]:
ovo_anova = pd.read_pickle(root + ovo_dep + "anova.pkl")
ovo_mutual_info = pd.read_pickle(root + ovo_dep + "mutual_info.pkl")
ovo_relieff = pd.read_pickle(root + ovo_dep + "relieff.pkl")
ovo_xgb_gain = pd.read_pickle(root + ovo_dep + "xgb_gain.pkl")
ovo_rfe = pd.read_pickle(root + ovo_dep + "rfe.pkl")
ovo_wrfs = pd.read_pickle(root + ovo_dep + "wrfs.pkl")

In [7]:
bin_anova[39]

Unnamed: 0,Accuracy,Precision,Recall,F1_Score,Fit_Time,Predict_Time,Score_Time,Test_Size
0,0.994318,1.0,0.988636,0.994286,0.398999,0.003,0.013997,528.0
1,1.0,1.0,1.0,1.0,0.433002,0.002999,0.013002,528.0
2,0.998106,1.0,0.996212,0.998102,0.439001,0.004002,0.013,528.0
3,1.0,1.0,1.0,1.0,0.433,0.003002,0.011999,528.0
4,0.998106,1.0,0.996212,0.998102,0.419,0.004002,0.012,528.0
5,0.994318,0.988764,1.0,0.99435,0.421999,0.003003,0.013,528.0
6,1.0,1.0,1.0,1.0,0.434996,0.002999,0.011999,528.0
7,0.998106,1.0,0.996212,0.998102,0.433003,0.003,0.011998,528.0
8,1.0,1.0,1.0,1.0,0.423998,0.003002,0.012,528.0
9,0.996212,1.0,0.992424,0.996198,0.425998,0.003005,0.011995,528.0


___

# Performance

### Helper Function

In [8]:
def table_performance(pfm, method, features, decimals=2):
    means = (pfm[features-1].mean()*100).round(decimals)
    std = (pfm[features-1].std()*100).round(decimals)

    columns = ["Accuracy", "Precision", "Recall", "F1_Score", "Fit_Time"]
    #columns = ["Accuracy", "F1_Score"]

    means = means[columns]
    std = std[columns]

    df = pd.DataFrame()
    df["Mean"] = means.astype('string')
    df["Std"] = std.astype('string')
    df[method] = df["Mean"] + ' ± ' + df["Std"]
    df = pd.DataFrame(df[method])
    df = df.T

    return df

### Tables

In [9]:
number_features = 15

In [10]:
binary = pd.concat([table_performance(bin_anova, "ANOVA", number_features, 2),
                    table_performance(bin_mutual_info, "Mutual Information", number_features, 2),
                    table_performance(bin_relieff, "ReliefF", number_features, 2),
                    table_performance(bin_xgb_gain, "XGBoost Gain", number_features, 2),
                    table_performance(bin_rfe, "RFE", number_features, 2),
                    table_performance(bin_wrfs, "Ensemble", number_features, 2)])
binary

Unnamed: 0,Accuracy,Precision,Recall,F1_Score,Fit_Time
ANOVA,99.73 ± 0.22,99.77 ± 0.36,99.7 ± 0.39,99.73 ± 0.22,27.9 ± 0.76
Mutual Information,97.44 ± 0.69,96.28 ± 0.96,98.71 ± 0.6,97.48 ± 0.68,188.44 ± 1.1
ReliefF,99.43 ± 0.28,99.03 ± 0.56,99.85 ± 0.26,99.43 ± 0.28,158.02 ± 14.82
XGBoost Gain,99.77 ± 0.23,99.85 ± 0.36,99.7 ± 0.39,99.77 ± 0.23,68.58 ± 1.8
RFE,99.77 ± 0.23,99.85 ± 0.36,99.7 ± 0.39,99.77 ± 0.23,1082.57 ± 31.35
Ensemble,99.75 ± 0.24,99.81 ± 0.36,99.7 ± 0.39,99.75 ± 0.24,345.27 ± 22.2


In [11]:
multiclass = pd.concat([table_performance(mul_anova, "ANOVA", number_features, 1),
                        table_performance(mul_mutual_info, "Mutual Information", number_features, 1),
                        table_performance(mul_relieff, "ReliefF", number_features, 1),
                        table_performance(mul_xgb_gain, "XGBoost Gain", number_features, 1),
                        table_performance(mul_rfe, "RFE", number_features, 1),
                        table_performance(mul_wrfs, "Ensemble", number_features, 1)])
multiclass

Unnamed: 0,Accuracy,Precision,Recall,F1_Score,Fit_Time
ANOVA,73.0 ± 1.7,73.7 ± 2.3,73.0 ± 1.7,71.6 ± 1.8,260.9 ± 2.7
Mutual Information,73.7 ± 1.8,73.9 ± 2.0,73.7 ± 1.8,72.7 ± 1.5,465.6 ± 2.9
ReliefF,72.3 ± 1.8,73.5 ± 2.4,72.3 ± 1.8,70.6 ± 1.9,339.6 ± 2.9
XGBoost Gain,73.1 ± 1.7,73.8 ± 2.4,73.1 ± 1.7,71.7 ± 1.8,792.9 ± 13.4
RFE,72.5 ± 2.0,73.8 ± 2.2,72.5 ± 2.0,70.9 ± 2.4,9861.3 ± 108.3
Ensemble,73.0 ± 1.7,73.7 ± 2.3,73.0 ± 1.7,71.5 ± 1.8,1015.4 ± 6.8


In [12]:
ovr = pd.concat([table_performance(ovr_anova, "ANOVA", number_features, 1),
                table_performance(ovr_mutual_info, "Mutual Information", number_features, 1),
                table_performance(ovr_relieff, "ReliefF", number_features, 1),
                table_performance(ovr_xgb_gain, "XGBoost Gain", number_features, 1),
                table_performance(ovr_rfe, "RFE", number_features, 1),
                table_performance(ovr_wrfs, "Ensemble", number_features, 1)])
ovr

Unnamed: 0,Accuracy,Precision,Recall,F1_Score,Fit_Time
ANOVA,73.3 ± 1.7,74.2 ± 2.2,73.3 ± 1.7,71.9 ± 1.8,311.0 ± 3.1
Mutual Information,73.9 ± 1.8,74.2 ± 2.2,73.9 ± 1.8,72.9 ± 1.6,527.4 ± 7.2
ReliefF,72.3 ± 1.6,73.3 ± 2.2,72.3 ± 1.6,70.5 ± 1.6,416.4 ± 12.0
XGBoost Gain,73.2 ± 1.5,74.1 ± 2.1,73.2 ± 1.5,71.7 ± 1.6,870.0 ± 12.8
RFE,73.0 ± 1.8,74.4 ± 2.0,73.0 ± 1.8,71.3 ± 2.2,9874.4 ± 106.8
Ensemble,73.3 ± 2.0,74.2 ± 2.8,73.3 ± 2.0,71.8 ± 2.1,1118.3 ± 8.3


In [13]:
ovo = pd.concat([table_performance(ovo_anova, "ANOVA", number_features, 1),
                table_performance(ovo_mutual_info, "Mutual Information", number_features, 1),
                table_performance(ovo_relieff, "ReliefF", number_features, 1),
                table_performance(ovo_xgb_gain, "XGBoost Gain", number_features, 1),
                table_performance(ovo_rfe, "RFE", number_features, 1),
                table_performance(ovo_wrfs, "Ensemble", number_features, 1)])
ovo

Unnamed: 0,Accuracy,Precision,Recall,F1_Score,Fit_Time
ANOVA,72.8 ± 1.8,73.3 ± 2.5,72.8 ± 1.8,71.3 ± 1.9,655.0 ± 10.5
Mutual Information,73.9 ± 1.9,74.1 ± 2.4,73.9 ± 1.9,72.9 ± 1.8,829.0 ± 11.2
ReliefF,72.1 ± 1.5,73.1 ± 2.1,72.1 ± 1.5,70.3 ± 1.6,732.2 ± 4.4
XGBoost Gain,73.0 ± 1.7,73.7 ± 2.3,73.0 ± 1.7,71.5 ± 1.8,1202.7 ± 21.2
RFE,72.6 ± 1.9,73.9 ± 2.4,72.6 ± 1.9,70.9 ± 2.3,10233.4 ± 185.3
Ensemble,72.8 ± 1.9,73.3 ± 2.5,72.8 ± 1.9,71.3 ± 2.0,1389.6 ± 11.2


___

# Runtimes

### Helper Function

In [14]:
def table_times(pfm, method, features, decimals=2):
    means = pfm[features-1].mean().round(decimals)
    std = pfm[features-1].std().round(decimals)

    columns = ["Fit_Time", "Predict_Time", "Score_Time"]

    means = means[columns]
    std = std[columns]

    df = pd.DataFrame()
    df["Mean"] = means.astype('string')
    df["Std"] = std.astype('string')
    df[method] = df["Mean"] + ' ± ' + df["Std"]
    df = pd.DataFrame(df[method])
    df = df.T

    return df

### Tables

In [15]:
binary = pd.concat([table_times(bin_anova, "ANOVA", number_features, 3),
                    table_times(bin_mutual_info, "Mutual Information", number_features, 2),
                    table_times(bin_relieff, "ReliefF", number_features, 2),
                    table_times(bin_xgb_gain, "XGBoost Gain", number_features, 3),
                    table_times(bin_rfe, "RFE", number_features, 2),
                    table_times(bin_wrfs, "Ensemble", number_features, 2)])
binary

Unnamed: 0,Fit_Time,Predict_Time,Score_Time
ANOVA,0.279 ± 0.008,0.003 ± 0.0,0.012 ± 0.001
Mutual Information,1.88 ± 0.01,0.0 ± 0.0,0.01 ± 0.0
ReliefF,1.58 ± 0.15,0.0 ± 0.0,0.01 ± 0.0
XGBoost Gain,0.686 ± 0.018,0.003 ± 0.001,0.012 ± 0.0
RFE,10.83 ± 0.31,0.0 ± 0.0,0.01 ± 0.0
Ensemble,3.45 ± 0.22,0.0 ± 0.0,0.01 ± 0.0


In [16]:
multiclass = pd.concat([table_times(mul_anova, "ANOVA", number_features, 3),
                        table_times(mul_mutual_info, "Mutual Information", number_features, 3),
                        table_times(mul_relieff, "ReliefF", number_features, 3),
                        table_times(mul_xgb_gain, "XGBoost Gain", number_features, 3),
                        table_times(mul_rfe, "RFE", number_features, 2),
                        table_times(mul_wrfs, "Ensemble", number_features, 3)])
multiclass

Unnamed: 0,Fit_Time,Predict_Time,Score_Time
ANOVA,2.609 ± 0.027,0.009 ± 0.0,0.027 ± 0.001
Mutual Information,4.656 ± 0.029,0.009 ± 0.0,0.028 ± 0.0
ReliefF,3.396 ± 0.029,0.009 ± 0.001,0.027 ± 0.0
XGBoost Gain,7.929 ± 0.134,0.009 ± 0.0,0.027 ± 0.001
RFE,98.61 ± 1.08,0.01 ± 0.0,0.03 ± 0.0
Ensemble,10.154 ± 0.068,0.01 ± 0.001,0.027 ± 0.001


In [17]:
ovo = pd.concat([table_times(ovo_anova, "ANOVA", number_features, 3),
                table_times(ovo_mutual_info, "Mutual Information", number_features, 3),
                table_times(ovo_relieff, "ReliefF", number_features, 3),
                table_times(ovo_xgb_gain, "XGBoost Gain", number_features, 3),
                table_times(ovo_rfe, "RFE", number_features, 1),
                table_times(ovo_wrfs, "Ensemble", number_features, 3)])
ovo

Unnamed: 0,Fit_Time,Predict_Time,Score_Time
ANOVA,6.55 ± 0.105,0.243 ± 0.006,0.032 ± 0.001
Mutual Information,8.29 ± 0.112,0.229 ± 0.005,0.029 ± 0.001
ReliefF,7.322 ± 0.044,0.226 ± 0.004,0.03 ± 0.001
XGBoost Gain,12.027 ± 0.212,0.235 ± 0.008,0.032 ± 0.001
RFE,102.3 ± 1.9,0.2 ± 0.0,0.0 ± 0.0
Ensemble,13.896 ± 0.112,0.223 ± 0.004,0.029 ± 0.001


In [18]:
ovr = pd.concat([table_times(ovr_anova, "ANOVA", number_features, 2),
                table_times(ovr_mutual_info, "Mutual Information", number_features, 3),
                table_times(ovr_relieff, "ReliefF", number_features, 3),
                table_times(ovr_xgb_gain, "XGBoost Gain", number_features, 2),
                table_times(ovr_rfe, "RFE", number_features, 1),
                table_times(ovr_wrfs, "Ensemble", number_features, 2)])
ovr

Unnamed: 0,Fit_Time,Predict_Time,Score_Time
ANOVA,3.11 ± 0.03,0.03 ± 0.0,0.03 ± 0.0
Mutual Information,5.274 ± 0.072,0.027 ± 0.001,0.027 ± 0.001
ReliefF,4.164 ± 0.12,0.027 ± 0.001,0.027 ± 0.001
XGBoost Gain,8.7 ± 0.13,0.03 ± 0.0,0.03 ± 0.0
RFE,98.7 ± 1.1,0.0 ± 0.0,0.0 ± 0.0
Ensemble,11.18 ± 0.08,0.03 ± 0.0,0.03 ± 0.0


___
# ANOVA Focus

In [19]:
columns = ["Binary (K=30)", "Binary (K=20)","Binary (K=10)",
            "Multiclass (K=30)", "Multiclass (K=30)", "Multiclass (K=30)",
            "OvR (K=30)", "OvR (K=20)", "OvR (K=10)",
            "OvO (K=30)", "OvO (K=20)", "OvO (K=10)"]

In [20]:
anova_pfm = pd.concat([table_performance(bin_anova,"ANOVA", 30, 2),
                        table_performance(bin_anova,"ANOVA", 20, 2),
                        table_performance(bin_anova,"ANOVA", 10, 2),
                        table_performance(mul_anova,"ANOVA", 30, 1),
                        table_performance(mul_anova,"ANOVA", 20, 1),
                        table_performance(mul_anova,"ANOVA", 10, 1),
                        table_performance(ovr_anova,"ANOVA", 30, 1),
                        table_performance(ovr_anova,"ANOVA", 20, 1),
                        table_performance(ovr_anova,"ANOVA", 10, 1),
                        table_performance(ovo_anova,"ANOVA", 30, 1),
                        table_performance(ovo_anova,"ANOVA", 20, 1),
                        table_performance(ovo_anova,"ANOVA", 10, 1)])


anova_pfm["experiment"] = columns
anova_pfm.drop(columns=["Accuracy", "Precision", "Recall", "Fit_Time"], inplace=True)

anova_pfm

Unnamed: 0,F1_Score,experiment
ANOVA,99.7 ± 0.27,Binary (K=30)
ANOVA,99.73 ± 0.22,Binary (K=20)
ANOVA,98.73 ± 0.27,Binary (K=10)
ANOVA,71.6 ± 1.8,Multiclass (K=30)
ANOVA,71.5 ± 1.8,Multiclass (K=30)
ANOVA,71.5 ± 1.7,Multiclass (K=30)
ANOVA,71.4 ± 1.8,OvR (K=30)
ANOVA,71.5 ± 1.6,OvR (K=20)
ANOVA,71.7 ± 1.7,OvR (K=10)
ANOVA,71.2 ± 1.8,OvO (K=30)


In [21]:
anova_times = pd.concat([table_times(bin_anova,"ANOVA", 30, 4),
                        table_times(bin_anova,"ANOVA", 20, 4),
                        table_times(bin_anova,"ANOVA", 10, 3),
                        table_times(mul_anova,"ANOVA", 30, 3),
                        table_times(mul_anova,"ANOVA", 20, 3),
                        table_times(mul_anova,"ANOVA", 10, 3),
                        table_times(ovr_anova,"ANOVA", 30, 3),
                        table_times(ovr_anova,"ANOVA", 20, 3),
                        table_times(ovr_anova,"ANOVA", 10, 3),
                        table_times(ovo_anova,"ANOVA", 30, 3),
                        table_times(ovo_anova,"ANOVA", 20, 3),
                        table_times(ovo_anova,"ANOVA", 10, 3)])


anova_times["experiment"] = columns
anova_times.drop(columns=["Predict_Time", "Score_Time"], inplace=True)

anova_times

Unnamed: 0,Fit_Time,experiment
ANOVA,0.3764 ± 0.0076,Binary (K=30)
ANOVA,0.3069 ± 0.0103,Binary (K=20)
ANOVA,0.332 ± 0.004,Binary (K=10)
ANOVA,3.682 ± 0.041,Multiclass (K=30)
ANOVA,2.991 ± 0.042,Multiclass (K=30)
ANOVA,2.532 ± 0.033,Multiclass (K=30)
ANOVA,4.481 ± 0.124,OvR (K=30)
ANOVA,3.6 ± 0.036,OvR (K=20)
ANOVA,2.821 ± 0.038,OvR (K=10)
ANOVA,7.437 ± 0.1,OvO (K=30)
