In [1]:
import pandas as pd
from os import path

In [2]:
DATA_DIR = "../../data"

In [3]:
seeds = [2, 13, 19, 27, 38, 42, 56, 63, 6, 78]

In [4]:
split_results = {}
for seed in seeds:
    results = pd.read_csv(path.join(
        DATA_DIR, 
        "evaluations/10-fold-cross-val", 
        f"training_split_{str(seed)}", 
        "all.csv"
    ), sep="\t", header=None)
    results.columns = ["model_name", "data", "preprocessing", "F1", "MCC", "Acc", "Precision", "Recall", "AUC", "filename"]
    split_results[seed] = results.drop("filename", axis=1)

In [5]:
split_results[2].groupby("model_name")[["F1", "MCC", "Acc", "Precision", "Recall", "AUC"]].mean()

Unnamed: 0_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SVM,0.371143,0.146661,0.605016,0.296979,0.542168,0.581581
gradient_boosting,0.303315,0.147968,0.702474,0.345715,0.340153,0.569799
kNN,0.315355,0.080399,0.605809,0.260567,0.439464,0.544628
logistic_regression,0.387935,0.174601,0.635576,0.306302,0.543291,0.600654
multilayer_perceptron,0.337268,0.160613,0.668304,0.333908,0.429923,0.580599
random_forest,0.340624,0.139347,0.652799,0.301725,0.443418,0.57502


In [6]:
df_list = [split_results[seed] for seed in seeds]
# Concatenate results for all data splits
df_concat = pd.concat(df_list)
df_concat.index = pd.MultiIndex.from_arrays(
    df_concat[['model_name', 'data', 'preprocessing']].values.T, 
    names=['model_name', 'data', 'preprocessing']
)

In [7]:
df_concat

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,model_name,data,preprocessing,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
kNN,sapiens,no_prepro,kNN,sapiens,no_prepro,0.222222,0.049237,0.712821,0.200000,0.250000,0.526840
logistic_regression,sapiens,no_prepro,logistic_regression,sapiens,no_prepro,0.155340,-0.105068,0.553846,0.112676,0.250000,0.431748
random_forest,sapiens,no_prepro,random_forest,sapiens,no_prepro,0.000000,-0.071877,0.810256,0.000000,0.000000,0.484663
multilayer_perceptron,sapiens,no_prepro,multilayer_perceptron,sapiens,no_prepro,0.135135,-0.063736,0.671795,0.119048,0.156250,0.464628
SVM,sapiens,no_prepro,SVM,sapiens,no_prepro,0.126126,-0.168216,0.502564,0.088608,0.218750,0.388516
...,...,...,...,...,...,...,...,...,...,...,...
logistic_regression,onehot,undersampling,logistic_regression,onehot,undersampling,0.434343,0.263509,0.668639,0.325758,0.651515,0.662155
random_forest,onehot,undersampling,random_forest,onehot,undersampling,0.427273,0.253701,0.627219,0.305195,0.712121,0.659369
multilayer_perceptron,onehot,undersampling,multilayer_perceptron,onehot,undersampling,0.433498,0.262213,0.659763,0.321168,0.666667,0.662377
SVM,onehot,undersampling,SVM,onehot,undersampling,0.403846,0.215814,0.633136,0.295775,0.636364,0.634358


In [8]:
grouped = df_concat.groupby(level=[0,1,2])
df_means = grouped.mean()

In [9]:
# Means across the different data splits
df_means

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SVM,bert,no_prepro,0.507240,0.364032,0.730250,0.402953,0.690977,0.714284
SVM,bert,oversampling,0.506687,0.367155,0.708219,0.387133,0.746117,0.722401
SVM,bert,scaling,0.505689,0.364406,0.729699,0.400351,0.694907,0.716397
SVM,bert,smote_os,0.501018,0.354187,0.709613,0.390839,0.713847,0.709784
SVM,bert,undersampling,0.488984,0.342339,0.680031,0.361206,0.763185,0.710218
...,...,...,...,...,...,...,...,...
random_forest,seqvec,no_prepro,0.451953,0.290178,0.681680,0.355046,0.656784,0.671579
random_forest,seqvec,oversampling,0.447549,0.283385,0.667503,0.341227,0.677699,0.671945
random_forest,seqvec,scaling,0.414400,0.265833,0.738895,0.411235,0.463628,0.634716
random_forest,seqvec,smote_os,0.447118,0.315350,0.776828,0.445276,0.468485,0.661370


# Model Statistics

In [10]:
df_means.groupby("model_name")[["F1", "MCC"]].mean()

Unnamed: 0_level_0,F1,MCC
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1
SVM,0.394295,0.205165
gradient_boosting,0.334377,0.204689
kNN,0.330485,0.128041
logistic_regression,0.422709,0.245924
multilayer_perceptron,0.36497,0.208613
random_forest,0.385203,0.224734


In [11]:
# Best combination for each model type
idx = df_means.groupby(['model_name'])['F1'].transform(max) == df_means['F1']
df_means[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SVM,seqvec,scaling,0.509131,0.36982,0.728634,0.407589,0.700679,0.71813
gradient_boosting,bert,oversampling,0.475494,0.319293,0.702276,0.371692,0.672134,0.690392
kNN,bert,smote_os,0.408461,0.218477,0.558076,0.278995,0.767088,0.636332
logistic_regression,pybiomed,oversampling,0.536689,0.406583,0.76079,0.437494,0.697789,0.736774
multilayer_perceptron,bert,oversampling,0.53195,0.401169,0.762377,0.438532,0.681812,0.730759
random_forest,bert,undersampling,0.474726,0.319482,0.680608,0.353643,0.724145,0.696209


# Data Statistics

In [12]:
df_means.groupby("data")[["F1", "MCC"]].mean()

Unnamed: 0_level_0,F1,MCC
data,Unnamed: 1_level_1,Unnamed: 2_level_1
bert,0.456629,0.313078
integer_encoded,0.324629,0.14314
onehot,0.405819,0.249978
protparam,0.323212,0.109874
pybiomed,0.430884,0.291806
sapiens,0.214769,0.008257
seqvec,0.448105,0.303894


In [13]:
# Best combination for each data representation
idx = df_means.groupby(['data'])['F1'].transform(max) == df_means['F1']
df_means[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic_regression,integer_encoded,oversampling,0.386339,0.19398,0.671059,0.307248,0.523023,0.615672
logistic_regression,onehot,scaling,0.473227,0.321671,0.711729,0.378534,0.653494,0.689867
logistic_regression,pybiomed,oversampling,0.536689,0.406583,0.76079,0.437494,0.697789,0.736774
logistic_regression,seqvec,scaling,0.532628,0.401101,0.746881,0.429014,0.71655,0.734494
multilayer_perceptron,bert,oversampling,0.53195,0.401169,0.762377,0.438532,0.681812,0.730759
multilayer_perceptron,sapiens,undersampling,0.286875,0.04549,0.507907,0.193383,0.564011,0.529952
random_forest,protparam,no_prepro,0.45484,0.320597,0.770327,0.447753,0.487191,0.664357


# Preprocessing Statistics

In [14]:
df_means.groupby("preprocessing")[["F1", "MCC"]].mean()

Unnamed: 0_level_0,F1,MCC
preprocessing,Unnamed: 1_level_1,Unnamed: 2_level_1
no_prepro,0.348428,0.19508
oversampling,0.393549,0.210792
scaling,0.350416,0.198809
smote_os,0.36614,0.203015
undersampling,0.4015,0.206609


In [15]:
# Best combination for each preprocessing
idx = df_means.groupby(['preprocessing'])['F1'].transform(max) == df_means['F1']
df_means[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic_regression,pybiomed,no_prepro,0.534737,0.403607,0.756843,0.432036,0.703476,0.736316
logistic_regression,pybiomed,oversampling,0.536689,0.406583,0.76079,0.437494,0.697789,0.736774
logistic_regression,seqvec,scaling,0.532628,0.401101,0.746881,0.429014,0.71655,0.734494
logistic_regression,seqvec,smote_os,0.524962,0.392618,0.771227,0.457876,0.62981,0.717434
multilayer_perceptron,bert,undersampling,0.516422,0.382529,0.731311,0.407415,0.721561,0.726955


# Best total results

In [16]:
df_means.nlargest(10, 'F1')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic_regression,pybiomed,oversampling,0.536689,0.406583,0.76079,0.437494,0.697789,0.736774
logistic_regression,pybiomed,no_prepro,0.534737,0.403607,0.756843,0.432036,0.703476,0.736316
logistic_regression,seqvec,scaling,0.532628,0.401101,0.746881,0.429014,0.71655,0.734494
multilayer_perceptron,bert,oversampling,0.53195,0.401169,0.762377,0.438532,0.681812,0.730759
logistic_regression,seqvec,no_prepro,0.531949,0.399743,0.743909,0.422766,0.724898,0.73612
logistic_regression,pybiomed,scaling,0.529316,0.396199,0.756182,0.431315,0.689428,0.730985
logistic_regression,seqvec,smote_os,0.524962,0.392618,0.771227,0.457876,0.62981,0.717434
logistic_regression,seqvec,oversampling,0.522227,0.386392,0.733405,0.40914,0.727253,0.730577
multilayer_perceptron,bert,undersampling,0.516422,0.382529,0.731311,0.407415,0.721561,0.726955
multilayer_perceptron,bert,smote_os,0.514998,0.379917,0.76296,0.439925,0.634271,0.714059


In [17]:
df_means.nlargest(10, 'MCC')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic_regression,pybiomed,oversampling,0.536689,0.406583,0.76079,0.437494,0.697789,0.736774
logistic_regression,pybiomed,no_prepro,0.534737,0.403607,0.756843,0.432036,0.703476,0.736316
multilayer_perceptron,bert,oversampling,0.53195,0.401169,0.762377,0.438532,0.681812,0.730759
logistic_regression,seqvec,scaling,0.532628,0.401101,0.746881,0.429014,0.71655,0.734494
logistic_regression,seqvec,no_prepro,0.531949,0.399743,0.743909,0.422766,0.724898,0.73612
logistic_regression,pybiomed,scaling,0.529316,0.396199,0.756182,0.431315,0.689428,0.730985
logistic_regression,seqvec,smote_os,0.524962,0.392618,0.771227,0.457876,0.62981,0.717434
logistic_regression,seqvec,oversampling,0.522227,0.386392,0.733405,0.40914,0.727253,0.730577
multilayer_perceptron,bert,undersampling,0.516422,0.382529,0.731311,0.407415,0.721561,0.726955
multilayer_perceptron,bert,smote_os,0.514998,0.379917,0.76296,0.439925,0.634271,0.714059


# Look at maxima and minima across training splits

### Maximum for models

In [18]:
grouped = df_concat.groupby(level=[0,1,2])
df_max = grouped.max().drop(["model_name", "data", "preprocessing"], axis=1)

In [None]:
idx = df_max.groupby(['model_name'])['F1'].transform(max) == df_max['F1']
df_max[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SVM,bert,smote_os,0.661017,0.568411,0.832636,0.565217,0.795918,0.819012
gradient_boosting,pybiomed,oversampling,0.564706,0.411283,0.77305,0.470588,0.730769,0.731865
kNN,bert,smote_os,0.493976,0.352712,0.648536,0.350427,0.884615,0.718367
logistic_regression,bert,smote_os,0.636364,0.534699,0.832636,0.57377,0.779412,0.788722
multilayer_perceptron,pybiomed,oversampling,0.621359,0.518664,0.83682,0.606061,1.0,0.768636
random_forest,protparam,no_prepro,0.590909,0.504951,0.849372,0.666667,0.653846,0.731096


### Minimum for models

In [21]:
grouped = df_concat.groupby(level=[0,1,2])
df_min = grouped.min().drop(["model_name", "data", "preprocessing"], axis=1)

In [22]:
idx = df_min.groupby(['model_name'])['F1'].transform(max) == df_min['F1']
df_min[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SVM,seqvec,scaling,0.450216,0.249013,0.598101,0.322981,0.55102,0.649884
gradient_boosting,bert,smote_os,0.431373,0.273958,0.686047,0.319588,0.5,0.651099
kNN,onehot,undersampling,0.364583,0.102041,0.52381,0.25,0.628571,0.561224
logistic_regression,seqvec,no_prepro,0.480874,0.313904,0.664557,0.369565,0.666667,0.687456
logistic_regression,seqvec,scaling,0.480874,0.324969,0.674051,0.376068,0.591837,0.693554
multilayer_perceptron,seqvec,oversampling,0.469565,0.327062,0.696203,0.393443,0.478261,0.686164
random_forest,pybiomed,undersampling,0.404908,0.193483,0.565111,0.308219,0.5,0.616144


### Maximum for data

In [23]:
idx = df_max.groupby(['data'])['F1'].transform(max) == df_max['F1']
df_max[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SVM,bert,smote_os,0.661017,0.568411,0.832636,0.565217,0.795918,0.819012
SVM,onehot,oversampling,0.589147,0.473594,0.777311,0.475,0.871429,0.776644
SVM,sapiens,scaling,0.390244,0.236061,0.6875,0.290909,0.592593,0.64968
gradient_boosting,integer_encoded,oversampling,0.504202,0.356373,0.769504,0.428571,0.619565,0.700859
logistic_regression,pybiomed,no_prepro,0.632768,0.52267,0.8159,0.536232,0.823529,0.79334
logistic_regression,seqvec,oversampling,0.615385,0.506697,0.811715,0.529412,0.804348,0.784249
random_forest,protparam,no_prepro,0.590909,0.504951,0.849372,0.666667,0.653846,0.731096


### Minimum for data

In [24]:
idx = df_min.groupby(['data'])['F1'].transform(max) == df_min['F1']
df_min[idx]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,MCC,Acc,Precision,Recall,AUC
model_name,data,preprocessing,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic_regression,onehot,scaling,0.414815,0.245571,0.648352,0.318182,0.55102,0.650089
logistic_regression,pybiomed,oversampling,0.466258,0.2913,0.708861,0.38961,0.575758,0.664808
logistic_regression,seqvec,no_prepro,0.480874,0.313904,0.664557,0.369565,0.666667,0.687456
logistic_regression,seqvec,scaling,0.480874,0.324969,0.674051,0.376068,0.591837,0.693554
multilayer_perceptron,bert,oversampling,0.467066,0.314971,0.690418,0.386139,0.574468,0.681901
multilayer_perceptron,sapiens,undersampling,0.206897,-0.072888,0.442424,0.133333,0.454545,0.450194
random_forest,integer_encoded,undersampling,0.341232,0.07306,0.53317,0.253086,0.514286,0.543728
random_forest,protparam,undersampling,0.40201,0.177123,0.61512,0.300885,0.571429,0.60482
