In [1]:
import pandas as pd
from os import path

In [2]:
DATA_DIR = "../../data"

In [3]:
seeds = [2, 13, 19, 27, 38, 42, 56, 63, 6, 78]

In [74]:
split_results = {}
for seed in seeds:
    results = pd.read_csv(path.join(
        DATA_DIR, 
        "evaluations/10-fold-cross-val", 
        f"training_split_{str(seed)}", 
        "all.csv"
    ), sep="\t", header=None)
    results.columns = ["model_name", "data", "preprocessing", "F1", "MCC", "Acc", "Precision", "Recall", "AUC", "filename"]
    split_results[seed] = results.drop("filename", axis=1)

In [34]:
split_results[2].groupby("model_name")[["F1", "MCC", "Acc", "Precision", "Recall", "AUC"]].mean()

Unnamed: 0_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SVM,0.408981,0.190795,0.620131,0.323243,0.59011,0.609392
gradient_boosting,0.329459,0.179447,0.710633,0.381363,0.359429,0.584999
kNN,0.320502,0.081799,0.60703,0.269098,0.434694,0.545381
logistic_regression,0.424416,0.215025,0.646329,0.334884,0.587429,0.625259
multilayer_perceptron,0.355921,0.184626,0.665696,0.359573,0.462286,0.592931
random_forest,0.37162,0.169642,0.654177,0.334364,0.475429,0.590235


In [77]:
df_list = [split_results[seed] for seed in seeds]
# Concatenate results for all data splits
df_concat = pd.concat(df_list)
df_concat.index = pd.MultiIndex.from_arrays(
    df_concat[['model_name', 'data', 'preprocessing']].values.T, 
    names=['model_name', 'data', 'preprocessing']
)

In [78]:
df_concat

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,model_name,data,preprocessing,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
logistic_regression,integer_encoded,no_prepro,logistic_regression,integer_encoded,no_prepro,0.371585,0.142592,0.636076,0.300885,0.485714,0.582288
random_forest,integer_encoded,no_prepro,random_forest,integer_encoded,no_prepro,0.313131,0.041068,0.569620,0.242188,0.442857,0.524274
multilayer_perceptron,integer_encoded,no_prepro,multilayer_perceptron,integer_encoded,no_prepro,0.292308,0.110924,0.708861,0.316667,0.271429,0.552381
SVM,integer_encoded,no_prepro,SVM,integer_encoded,no_prepro,0.328358,0.148334,0.715190,0.343750,0.314286,0.571777
gradient_boosting,integer_encoded,no_prepro,gradient_boosting,integer_encoded,no_prepro,0.181818,0.067994,0.743671,0.310345,0.128571,0.523635
...,...,...,...,...,...,...,...,...,...,...,...
logistic_regression,seqvec,undersampling,logistic_regression,seqvec,undersampling,0.474227,0.323939,0.699115,0.359375,0.696970,0.698302
random_forest,seqvec,undersampling,random_forest,seqvec,undersampling,0.395604,0.210662,0.675516,0.310345,0.545455,0.626207
multilayer_perceptron,seqvec,undersampling,multilayer_perceptron,seqvec,undersampling,0.484536,0.339307,0.705015,0.367188,0.712121,0.707709
SVM,seqvec,undersampling,SVM,seqvec,undersampling,0.422414,0.248601,0.604720,0.295181,0.742424,0.656926


In [92]:
grouped = df_concat.groupby(level=[0,1,2])
df_means = grouped.mean()

In [93]:
# Means across the different data splits
df_means

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SVM,bert,no_prepro,0.511476,0.369663,0.734937,0.408895,0.689506,0.716687
SVM,bert,oversampling,0.506430,0.365911,0.703273,0.384740,0.753730,0.722173
SVM,bert,scaling,0.503104,0.361050,0.724788,0.395397,0.700337,0.715076
SVM,bert,smote_os,0.492383,0.343877,0.705472,0.384618,0.704102,0.703441
SVM,bert,undersampling,0.482973,0.337028,0.675871,0.354706,0.763936,0.708108
...,...,...,...,...,...,...,...,...
random_forest,seqvec,no_prepro,0.450656,0.292323,0.698715,0.370356,0.618662,0.667989
random_forest,seqvec,oversampling,0.444103,0.273374,0.655308,0.332349,0.687136,0.666839
random_forest,seqvec,scaling,0.438111,0.298024,0.746519,0.423089,0.506501,0.656152
random_forest,seqvec,smote_os,0.432441,0.305928,0.785854,0.450704,0.427376,0.651185


# Model Statistics

In [94]:
df_means.groupby("model_name")[["F1", "MCC"]].mean()

Unnamed: 0_level_0,F1,MCC
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1
SVM,0.413756,0.229378
gradient_boosting,0.360277,0.240763
kNN,0.341701,0.140107
logistic_regression,0.448201,0.277278
multilayer_perceptron,0.391141,0.240629
random_forest,0.416439,0.262458


In [97]:
# Best combination for each model type
idx = df_means.groupby(['model_name'])['F1'].transform(max) == df_means['F1']
df_means[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SVM,bert,no_prepro,0.511476,0.369663,0.734937,0.408895,0.689506,0.716687
gradient_boosting,bert,oversampling,0.471559,0.31207,0.692493,0.365285,0.679068,0.686642
kNN,bert,smote_os,0.405207,0.217155,0.562808,0.277006,0.75821,0.636233
logistic_regression,pybiomed,oversampling,0.53782,0.407955,0.762608,0.439484,0.695866,0.737158
multilayer_perceptron,bert,oversampling,0.538064,0.408544,0.763449,0.443098,0.691329,0.734988
random_forest,bert,undersampling,0.468407,0.313998,0.680839,0.348546,0.716409,0.693756


# Data Statistics

In [98]:
df_means.groupby("data")[["F1", "MCC"]].mean()

Unnamed: 0_level_0,F1,MCC
data,Unnamed: 1_level_1,Unnamed: 2_level_1
bert,0.455356,0.311129
integer_encoded,0.323309,0.141516
protparam,0.320883,0.110511
pybiomed,0.431368,0.293128
seqvec,0.445348,0.30256


In [99]:
# Best combination for each data representation
idx = df_means.groupby(['data'])['F1'].transform(max) == df_means['F1']
df_means[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic_regression,integer_encoded,scaling,0.387019,0.195232,0.670487,0.309381,0.52354,0.615693
logistic_regression,pybiomed,oversampling,0.53782,0.407955,0.762608,0.439484,0.695866,0.737158
logistic_regression,seqvec,scaling,0.535148,0.404543,0.751692,0.433899,0.711233,0.735648
multilayer_perceptron,bert,oversampling,0.538064,0.408544,0.763449,0.443098,0.691329,0.734988
random_forest,protparam,oversampling,0.454604,0.295349,0.728896,0.391257,0.558587,0.665778


# Preprocessing Statistics

In [100]:
df_means.groupby("preprocessing")[["F1", "MCC"]].mean()

Unnamed: 0_level_0,F1,MCC
preprocessing,Unnamed: 1_level_1,Unnamed: 2_level_1
no_prepro,0.37419,0.225714
oversampling,0.414321,0.237136
scaling,0.375901,0.228701
smote_os,0.3955,0.2352
undersampling,0.416351,0.232093


In [101]:
# Best combination for each preprocessing
idx = df_means.groupby(['preprocessing'])['F1'].transform(max) == df_means['F1']
df_means[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic_regression,bert,undersampling,0.512099,0.379121,0.716505,0.387368,0.758477,0.73201
logistic_regression,pybiomed,no_prepro,0.532865,0.40068,0.757207,0.432126,0.696689,0.734044
logistic_regression,seqvec,scaling,0.535148,0.404543,0.751692,0.433899,0.711233,0.735648
logistic_regression,seqvec,smote_os,0.522061,0.390107,0.772814,0.456023,0.62366,0.715215
multilayer_perceptron,bert,oversampling,0.538064,0.408544,0.763449,0.443098,0.691329,0.734988


# Best total results

In [107]:
df_means.nlargest(10, 'F1')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
multilayer_perceptron,bert,oversampling,0.538064,0.408544,0.763449,0.443098,0.691329,0.734988
logistic_regression,pybiomed,oversampling,0.53782,0.407955,0.762608,0.439484,0.695866,0.737158
logistic_regression,seqvec,scaling,0.535148,0.404543,0.751692,0.433899,0.711233,0.735648
logistic_regression,pybiomed,no_prepro,0.532865,0.40068,0.757207,0.432126,0.696689,0.734044
logistic_regression,pybiomed,scaling,0.52945,0.396086,0.757636,0.432682,0.685582,0.730407
logistic_regression,seqvec,no_prepro,0.529265,0.396825,0.7487,0.429028,0.70431,0.732104
logistic_regression,seqvec,oversampling,0.523944,0.387562,0.731346,0.409644,0.732769,0.731258
logistic_regression,seqvec,smote_os,0.522061,0.390107,0.772814,0.456023,0.62366,0.715215
logistic_regression,bert,oversampling,0.516433,0.374458,0.7293,0.407742,0.710858,0.721355
logistic_regression,bert,undersampling,0.512099,0.379121,0.716505,0.387368,0.758477,0.73201


In [108]:
df_means.nlargest(10, 'MCC')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
multilayer_perceptron,bert,oversampling,0.538064,0.408544,0.763449,0.443098,0.691329,0.734988
logistic_regression,pybiomed,oversampling,0.53782,0.407955,0.762608,0.439484,0.695866,0.737158
logistic_regression,seqvec,scaling,0.535148,0.404543,0.751692,0.433899,0.711233,0.735648
logistic_regression,pybiomed,no_prepro,0.532865,0.40068,0.757207,0.432126,0.696689,0.734044
logistic_regression,seqvec,no_prepro,0.529265,0.396825,0.7487,0.429028,0.70431,0.732104
logistic_regression,pybiomed,scaling,0.52945,0.396086,0.757636,0.432682,0.685582,0.730407
logistic_regression,seqvec,smote_os,0.522061,0.390107,0.772814,0.456023,0.62366,0.715215
logistic_regression,seqvec,oversampling,0.523944,0.387562,0.731346,0.409644,0.732769,0.731258
logistic_regression,bert,undersampling,0.512099,0.379121,0.716505,0.387368,0.758477,0.73201
multilayer_perceptron,bert,undersampling,0.510034,0.376927,0.732419,0.404445,0.710068,0.723627
