In [77]:
import pandas as pd
import os

base_dir = "/home/cerebriu/data/datasets/MScThesis/KDD/evaluations/"
# Define folder paths and file names
folders = ['VALDO-post_robust_valid', 'VALDO_nopostprocess', 'SWI_robust_processed', 'T2S_robust_processed']
file_names = ['detection_metrics.csv', 'segmentation_metrics.csv', 'classification_metrics.csv']


dataframes = []
cols_existing = []

for folder in folders:
    for file_name in file_names:
        file_path = os.path.join(base_dir, folder, file_name)
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)

            # Modify column names with a suffix based on the file type
            file_type = file_name.split('_')[0]  # Example: 'detection', 'segmentation', 'classification'
            df.columns = [col if file_name.split("_")[0] != "classification" else f'{col}*' for col in df.columns]
            cols_existing.extend(list(df.columns))
            df['src'] = "CRB-" + folder.split("_")[0] if "VALDO" not in folder.split("_")[0] else folder.split("_")[0]

            dataframes.append(df)


# Combine all DataFrames with updated column names
combined_df = pd.concat(dataframes, axis=0)
# Create a single row per src
final_df = combined_df.groupby('src').first().reset_index()


In [78]:
final_df

Unnamed: 0,src,TPR,PPV,F1,TPavg,FPavg,FPmedian,FP/cmb,FNavg,Dice,TPR*,PPV*,F1*,TNR*,ACC*
0,CRB-SWI,0.62,0.48,0.54,7.25,8.0,5.0,0.68,4.5,0.61,1.0,1.0,1.0,0.0,1.0
1,CRB-T2S,0.6,0.45,0.52,7.25,8.75,7.5,0.73,4.75,0.7,1.0,1.0,1.0,0.0,1.0
2,VALDO,0.71,0.44,0.54,1.23,1.59,1.0,0.92,0.5,0.64,0.87,0.87,0.87,0.71,0.82
3,VALDO-post,0.71,0.46,0.56,1.23,1.45,1.0,0.84,0.5,0.65,0.87,0.87,0.87,0.71,0.82


In [79]:
# Filter rows where 'src' starts with "CRB-"
crb_rows = final_df[final_df['src'].str.startswith("CRB-")]

# Perform aggregation - example using mean, but you can change this as needed
aggregated_crb_row = crb_rows.mean(numeric_only=True)
aggregated_crb_row['src'] = 'CRB-all'  # Assign a new src name for the aggregated row

# Append aggregated row to the final DataFrame
final_df = final_df.append(aggregated_crb_row, ignore_index=True)
final_df

  final_df = final_df.append(aggregated_crb_row, ignore_index=True)


Unnamed: 0,src,TPR,PPV,F1,TPavg,FPavg,FPmedian,FP/cmb,FNavg,Dice,TPR*,PPV*,F1*,TNR*,ACC*
0,CRB-SWI,0.62,0.48,0.54,7.25,8.0,5.0,0.68,4.5,0.61,1.0,1.0,1.0,0.0,1.0
1,CRB-T2S,0.6,0.45,0.52,7.25,8.75,7.5,0.73,4.75,0.7,1.0,1.0,1.0,0.0,1.0
2,VALDO,0.71,0.44,0.54,1.23,1.59,1.0,0.92,0.5,0.64,0.87,0.87,0.87,0.71,0.82
3,VALDO-post,0.71,0.46,0.56,1.23,1.45,1.0,0.84,0.5,0.65,0.87,0.87,0.87,0.71,0.82
4,CRB-all,0.61,0.465,0.53,7.25,8.375,6.25,0.705,4.625,0.655,1.0,1.0,1.0,0.0,1.0


In [81]:
# Identify classification columns (marked with '*')
classification_cols = [col for col in final_df.columns if col.endswith('*')]

# Update classification columns to '-' for 'src' starting with "CRB-"
final_df.loc[final_df['src'].str.startswith("CRB-"), classification_cols] = '-'


# Define a custom sort order
sort_order = {
    'VALDO': 1,
    'VALDO-post': 2,
    'CRB': 3,
    'CRB-Aggregated': 4
}

# Function to assign a sort key to each row
def get_sort_key(x):
    for key in sort_order:
        if x.startswith(key):
            return sort_order[key]
    return 5  # For any other cases

# Sort the DataFrame using the custom sort key
final_df['sort_key'] = final_df['src'].apply(get_sort_key)
final_df = final_df.sort_values(by='sort_key').drop('sort_key', axis=1)
final_df

Unnamed: 0,src,TPR,PPV,F1,TPavg,FPavg,FPmedian,FP/cmb,FNavg,Dice,TPR*,PPV*,F1*,TNR*,ACC*
2,VALDO,0.71,0.44,0.54,1.23,1.59,1.0,0.92,0.5,0.64,0.87,0.87,0.87,0.71,0.82
3,VALDO-post,0.71,0.46,0.56,1.23,1.45,1.0,0.84,0.5,0.65,0.87,0.87,0.87,0.71,0.82
0,CRB-SWI,0.62,0.48,0.54,7.25,8.0,5.0,0.68,4.5,0.61,-,-,-,-,-
1,CRB-T2S,0.6,0.45,0.52,7.25,8.75,7.5,0.73,4.75,0.7,-,-,-,-,-
4,CRB-all,0.61,0.465,0.53,7.25,8.375,6.25,0.705,4.625,0.655,-,-,-,-,-
