In [8]:
import pandas as pd
import numpy as np
import glob

In [13]:
def extract_metrics(file_path, has_auc=False):
    df = pd.read_csv(file_path)
    metrics = {
        'accuracy': df.loc[df.index[-3], 'precision'],
        'f1_macro': df.loc[df.index[-2], 'f1-score']
    }
    if has_auc:
        metrics['auc'] = df.loc[df.index[-1], 'precision']
    return metrics

In [17]:
model_names = ['axial_full_pretrained', 'coronal_full_pretrained', 
               'sagittal_full_pretrained', 'combined_pretrained', 'weighted_pretrained']
csv_files = {
    model: glob.glob(f"{model}*.csv")
    for model in model_names
}

In [18]:
results = {}
for model, files in csv_files.items():
    model_metrics = []
    for file in files:
        metrics = extract_metrics(file, has_auc=(model in ['combined_pretrained', 
                                                           'weighted_pretrained']))
        model_metrics.append(metrics)
    results[model] = pd.DataFrame(model_metrics)

summary = {}
for model, metrics_df in results.items():
    summary[model] = {
        'mean': metrics_df.mean(),
        'std': metrics_df.std()
    }

comparison = {}
for metric in ['accuracy', 'f1_macro']:
    comparison[metric] = {
        'before_combined': [summary[model]['mean'][metric] for model in model_names[:-2]],
        'combined': summary['combined_pretrained']['mean'][metric],
        'weighted': summary['weighted_pretrained']['mean'][metric]
    }

if 'auc' in summary['combined_pretrained']['mean'] and 'auc' in summary['weighted_pretrained']['mean']:
    comparison['auc'] = {
        'before_combined': None,
        'combined': summary['combined_pretrained']['mean']['auc'],
        'weighted': summary['weighted_pretrained']['mean']['auc']
    }

In [19]:
for model, stats in summary.items():
    print(f"Model: {model}")
    print(f"Mean:\n{stats['mean']}")
    print(f"Std:\n{stats['std']}")
    print("-" * 40)

print("Comparison of metrics:")
for metric, values in comparison.items():
    print(f"Metric: {metric}")
    print(f"Before combined: {values['before_combined']}")
    print(f"Combined: {values['combined']}")
    print(f"Weighted: {values['weighted']}")
    print("=" * 40)

Model: axial_full_pretrained
Mean:
accuracy    0.871475
f1_macro    0.888182
dtype: float64
Std:
accuracy    0.016981
f1_macro    0.016026
dtype: float64
----------------------------------------
Model: coronal_full_pretrained
Mean:
accuracy    0.781311
f1_macro    0.805876
dtype: float64
Std:
accuracy    0.014083
f1_macro    0.017650
dtype: float64
----------------------------------------
Model: sagittal_full_pretrained
Mean:
accuracy    0.760328
f1_macro    0.784956
dtype: float64
Std:
accuracy    0.025381
f1_macro    0.023288
dtype: float64
----------------------------------------
Model: combined_pretrained
Mean:
accuracy    0.847360
f1_macro    0.821272
auc         0.956430
dtype: float64
Std:
accuracy    0.020793
f1_macro    0.011342
auc         0.071599
dtype: float64
----------------------------------------
Model: weighted_pretrained
Mean:
accuracy    0.858830
f1_macro    0.821501
auc         0.985380
dtype: float64
Std:
accuracy    0.017338
f1_macro    0.016866
auc         0.003

In [20]:
model_names = ['axial_full_unpretrained', 'coronal_full_unpretrained', 
               'sagittal_full_unpretrained', 'combined_unpretrained', 'weighted_unpretrained']
csv_files = {
    model: glob.glob(f"{model}*.csv")
    for model in model_names
}

In [23]:
results = {}
for model, files in csv_files.items():
    model_metrics = []
    for file in files:
        metrics = extract_metrics(file, has_auc=(model in ['combined_unpretrained', 
                                                           'weighted_unpretrained']))
        model_metrics.append(metrics)
    results[model] = pd.DataFrame(model_metrics)

summary = {}
for model, metrics_df in results.items():
    summary[model] = {
        'mean': metrics_df.mean(),
        'std': metrics_df.std()
    }

comparison = {}
for metric in ['accuracy', 'f1_macro']:
    comparison[metric] = {
        'before_combined': [summary[model]['mean'][metric] for model in model_names[:-2]],
        'combined': summary['combined_unpretrained']['mean'][metric],
        'weighted': summary['weighted_unpretrained']['mean'][metric]
    }

if 'auc' in summary['combined_unpretrained']['mean'] and 'auc' in summary['weighted_unpretrained']['mean']:
    comparison['auc'] = {
        'before_combined': None,
        'combined': summary['combined_unpretrained']['mean']['auc'],
        'weighted': summary['weighted_unpretrained']['mean']['auc']
    }

In [24]:
for model, stats in summary.items():
    print(f"Model: {model}")
    print(f"Mean:\n{stats['mean']}")
    print(f"Std:\n{stats['std']}")
    print("-" * 40)

print("Comparison of metrics:")
for metric, values in comparison.items():
    print(f"Metric: {metric}")
    print(f"Before combined: {values['before_combined']}")
    print(f"Combined: {values['combined']}")
    print(f"Weighted: {values['weighted']}")
    print("=" * 40)

Model: axial_full_unpretrained
Mean:
accuracy    0.845574
f1_macro    0.861371
dtype: float64
Std:
accuracy    0.043364
f1_macro    0.041646
dtype: float64
----------------------------------------
Model: coronal_full_unpretrained
Mean:
accuracy    0.247541
f1_macro    0.200427
dtype: float64
Std:
accuracy    0.027554
f1_macro    0.042027
dtype: float64
----------------------------------------
Model: sagittal_full_unpretrained
Mean:
accuracy    0.548197
f1_macro    0.515055
dtype: float64
Std:
accuracy    0.042585
f1_macro    0.057108
dtype: float64
----------------------------------------
Model: combined_unpretrained
Mean:
accuracy    0.889884
f1_macro    0.874525
auc         0.972017
dtype: float64
Std:
accuracy    0.019434
f1_macro    0.022977
auc         0.047795
dtype: float64
----------------------------------------
Model: weighted_unpretrained
Mean:
accuracy    0.885446
f1_macro    0.863117
auc         0.990220
dtype: float64
Std:
accuracy    0.014425
f1_macro    0.020344
auc    