In [8]:
import pandas as pd
import os

directory = 'experiments_results\models'

csv_files = [file for file in os.listdir(directory)]
classification_df = pd.DataFrame()
regression_df = pd.DataFrame()

for filename in csv_files:
    if 'results_classification_' in filename:
        df = pd.read_csv(os.path.join(directory, filename))
        df['source_file'] = filename.replace('results_classification_', "")
        classification_df = pd.concat([classification_df, df], ignore_index=True)
    elif 'results_regression_' in filename:
        df = pd.read_csv(os.path.join(directory, filename))
        df['source_file'] = filename.replace('results_regression_', "")
        regression_df = pd.concat([regression_df, df], ignore_index=True)

classification_df.drop('Unnamed: 0', axis=1, inplace=True)
regression_df.drop('Unnamed: 0', axis=1, inplace=True)

In [17]:
classification_df.head()

Unnamed: 0,Classifier,Accuracy,source_file
0,RandomForestClassifier-50-None-2-1,0.8111,descriptors_sc.csv
1,RandomForestClassifier-50-None-2-2,0.817,descriptors_sc.csv
2,RandomForestClassifier-50-None-2-4,0.8111,descriptors_sc.csv
3,RandomForestClassifier-50-None-5-1,0.8124,descriptors_sc.csv
4,RandomForestClassifier-50-None-5-2,0.815,descriptors_sc.csv


In [25]:
classification_df['Classifier_name'] = classification_df['Classifier'].str.split('-').str[0]
classification_df['Parameters'] = classification_df['Classifier'].str.split('-').str[1:].str.join('-')

In [26]:
classification_df.head()

Unnamed: 0,Classifier,Accuracy,source_file,Classifier_name,Parameters
0,RandomForestClassifier-50-None-2-1,0.8111,descriptors_sc.csv,RandomForestClassifier,50-None-2-1
1,RandomForestClassifier-50-None-2-2,0.817,descriptors_sc.csv,RandomForestClassifier,50-None-2-2
2,RandomForestClassifier-50-None-2-4,0.8111,descriptors_sc.csv,RandomForestClassifier,50-None-2-4
3,RandomForestClassifier-50-None-5-1,0.8124,descriptors_sc.csv,RandomForestClassifier,50-None-5-1
4,RandomForestClassifier-50-None-5-2,0.815,descriptors_sc.csv,RandomForestClassifier,50-None-5-2


In [28]:
df_sorted = classification_df.sort_values(by='Accuracy', ascending=False).groupby('Classifier_name').apply(lambda x: x.sort_values(by='Accuracy', ascending=False))

In [43]:
df_sorted.drop('Classifier_name', axis=1, inplace=True)
df_sorted.drop('Classifier', axis=1, inplace=True)

In [44]:
for group_name, group_df in df_sorted.groupby('Classifier_name'):
    print(f"Group: {group_name}")
    print(group_df.head(10))
    print("\n")

Group: GradientBoostingClassifier
                                Accuracy            source_file Parameters
Classifier_name                                                           
GradientBoostingClassifier 505    0.8216    fingerprints_sc.csv    100-0.1
                           509    0.8170    fingerprints_sc.csv    200-0.1
                           163    0.8150     descriptors_sc.csv    100-0.1
                           168    0.8117     descriptors_sc.csv    200-0.5
                           502    0.8104    fingerprints_sc.csv     10-0.5
                           167    0.8104     descriptors_sc.csv    200-0.1
                           164    0.8078     descriptors_sc.csv    100-0.5
                           339    0.8038  descriptors_scpca.csv    200-0.5
                           335    0.8025  descriptors_scpca.csv    100-0.5
                           169    0.8018     descriptors_sc.csv    200-1.0


Group: LogisticRegression
                        Accuracy      

In [45]:
regression_df.head()

Unnamed: 0,Classifier,Accuracy,source_file
0,LinearRegression-0.001-l1-liblinear,-2.3804644783006964e+22,descriptors_sc.csv
1,"MLPRegressor-(50,)-relu-0.0001-200",-1.5518,descriptors_sc.csv
2,"MLPRegressor-(50,)-relu-0.0001-500",-1.2417,descriptors_sc.csv
3,"MLPRegressor-(50,)-relu-0.0001-1000",-1.1356,descriptors_sc.csv
4,"MLPRegressor-(50,)-relu-0.001-200",-1.1921,descriptors_sc.csv


In [46]:
regression_df['Classifier_name'] = regression_df['Classifier'].str.split('-').str[0]
regression_df['Parameters'] = regression_df['Classifier'].str.split('-').str[1:].str.join('-')

In [47]:
regression_df.head()

Unnamed: 0,Classifier,Accuracy,source_file,Classifier_name,Parameters
0,LinearRegression-0.001-l1-liblinear,-2.3804644783006964e+22,descriptors_sc.csv,LinearRegression,0.001-l1-liblinear
1,"MLPRegressor-(50,)-relu-0.0001-200",-1.5518,descriptors_sc.csv,MLPRegressor,"(50,)-relu-0.0001-200"
2,"MLPRegressor-(50,)-relu-0.0001-500",-1.2417,descriptors_sc.csv,MLPRegressor,"(50,)-relu-0.0001-500"
3,"MLPRegressor-(50,)-relu-0.0001-1000",-1.1356,descriptors_sc.csv,MLPRegressor,"(50,)-relu-0.0001-1000"
4,"MLPRegressor-(50,)-relu-0.001-200",-1.1921,descriptors_sc.csv,MLPRegressor,"(50,)-relu-0.001-200"


In [61]:
regression_df.dtypes

Classifier          object
Accuracy           float64
source_file         object
Classifier_name     object
Parameters          object
dtype: object

In [63]:
regression_df['Accuracy'] = regression_df['Accuracy'].abs()

In [65]:
regression_df.sort_values(by='Accuracy', ascending=False)

Unnamed: 0,Classifier,Accuracy,source_file,Classifier_name,Parameters
377,LinearRegression-0.001-l1-liblinear,2.399973e+24,fingerprints_sc.csv,LinearRegression,0.001-l1-liblinear
0,LinearRegression-0.001-l1-liblinear,2.380464e+22,descriptors_sc.csv,LinearRegression,0.001-l1-liblinear
525,LinearRegression-0.001-l1-liblinear,8.014320e+01,fingerprints_scpca.csv,LinearRegression,0.001-l1-liblinear
591,GradientBoostingRegressor-200-2.0,6.791030e+01,fingerprints_scpca.csv,GradientBoostingRegressor,200-2.0
391,"MLPRegressor-(50,)-tanh-0.001-500",6.751140e+01,fingerprints_sc.csv,MLPRegressor,"(50,)-tanh-0.001-500"
...,...,...,...,...,...
369,RandomForestRegressor-200-20-2-2,6.258000e-01,fingerprints_sc.csv,RandomForestRegressor,200-20-2-2
368,RandomForestRegressor-200-20-2-1,6.240000e-01,fingerprints_sc.csv,RandomForestRegressor,200-20-2-1
140,GradientBoostingRegressor-100-0.1,6.182000e-01,descriptors_sc.csv,GradientBoostingRegressor,100-0.1
350,RandomForestRegressor-200-None-2-1,6.165000e-01,fingerprints_sc.csv,RandomForestRegressor,200-None-2-1


In [66]:
df_sorted = regression_df.sort_values(by='Accuracy', ascending=False).groupby('Classifier_name').apply(lambda x: x.sort_values(by='Accuracy', ascending=False))

In [67]:
df_sorted.drop('Classifier_name', axis=1, inplace=True)
df_sorted.drop('Classifier', axis=1, inplace=True)

In [68]:
df_sorted

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,source_file,Parameters
Classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GradientBoostingRegressor,591,67.9103,fingerprints_scpca.csv,200-2.0
GradientBoostingRegressor,439,3.9598,fingerprints_sc.csv,100-2.0
GradientBoostingRegressor,443,3.7254,fingerprints_sc.csv,200-2.0
GradientBoostingRegressor,587,3.4377,fingerprints_scpca.csv,100-2.0
GradientBoostingRegressor,583,2.8763,fingerprints_scpca.csv,10-2.0
...,...,...,...,...
RandomForestRegressor,353,0.6267,fingerprints_sc.csv,200-None-5-1
RandomForestRegressor,323,0.6267,fingerprints_sc.csv,100-None-2-1
RandomForestRegressor,369,0.6258,fingerprints_sc.csv,200-20-2-2
RandomForestRegressor,368,0.6240,fingerprints_sc.csv,200-20-2-1


In [69]:
for group_name, group_df in df_sorted.groupby('Classifier_name'):
    print(f"Group: {group_name}")
    print(group_df.head(10))
    print("\n")

Group: GradientBoostingRegressor
                               Accuracy             source_file Parameters
Classifier_name                                                           
GradientBoostingRegressor 591   67.9103  fingerprints_scpca.csv    200-2.0
                          439    3.9598     fingerprints_sc.csv    100-2.0
                          443    3.7254     fingerprints_sc.csv    200-2.0
                          587    3.4377  fingerprints_scpca.csv    100-2.0
                          583    2.8763  fingerprints_scpca.csv     10-2.0
                          147    2.4976      descriptors_sc.csv    200-2.0
                          143    2.4803      descriptors_sc.csv    100-2.0
                          139    2.4298      descriptors_sc.csv     10-2.0
                          291    2.3050   descriptors_scpca.csv    100-2.0
                          295    2.3050   descriptors_scpca.csv    200-2.0


Group: LinearRegression
                          Accuracy       