In [None]:
import pandas as pd
import matplotlib.pyplot as plt

files = [
    "results_lr.csv",
    "results_cpl.csv",
    "results_rf.csv",
    "results_svm.csv"
]

df_results = pd.concat([pd.read_csv(f"results/single_classifier/{f}") for f in files])
df_results['model'] = df_results.classifier.apply(lambda s:s[:s.find(' (')])
df_results['params'] = df_results.classifier.apply(lambda s:s[s.find(' ('):])
df_results['model_type'] = df_results.classifier.apply(lambda s:s[:s.find('_')])
df_results.head()

In [None]:
# Generate graphs of the dependence of the parameters A, B, C, D on the value of p
# A - Lustgarten features selection stability
# B - Nogueira features selection stability
# C - Average number of selected features
# D - Test accuracy

for model_type, df_results_mt in df_results.groupby(['model_type']):
    datasets_num = len(df_results_mt['dataset'].unique())
    fig, axes = plt.subplots(datasets_num, 4, figsize=(16, 4*datasets_num))
    for i, (dataset, df_dataset) in enumerate(df_results_mt.groupby(['dataset'])):
        ax = axes[i]
        for params, df in df_dataset.groupby(['model']):
            label = f"{params}"
            ax[0].plot(df.p, df['Lustgarten'], label=label)
            ax[1].plot(df.p, df['Nogueira'], label=label)
            ax[2].plot(df.p, df['#features_mean'], label=label)
            ax[3].plot(df.p, df['acc_test'], label=label)
            
            ax[0].set_ylabel(f"{dataset}\n", fontsize=16)
            if i==0:
                ax[0].set_title("Lustgarten features selection stability\n", fontsize=12)
                ax[1].set_title("Nogueira features selection stability\n", fontsize=12)
                ax[2].set_title("Average number of selected features\n", fontsize=12)
                ax[3].set_title("Test accuracy\n", fontsize=12)

            ax[0].set_ylim([0, 1.0])
            ax[1].set_ylim([0, 1.0])
            ax[3].set_ylim([0.6, 1.0])
            if model_type == "LogisticRegression":
                ax[2].set_ylim([0, 500])
            elif model_type == "SVM":
                ax[2].set_ylim([0, 200])
            elif model_type == "RandomForest":
                ax[2].set_ylim([0, 200])
            elif model_type == "CPL":
                ax[2].set_ylim([0, 50])

    handles, labels = ax[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='lower center', ncol=5)
