In [1]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [2]:
def extract_best_epochs(filepath):
	"""Extract the best epoch results (for each fold) for a given experiment."""
	organ_names = ["bowel_bag", "bladder", "rectum", "hip"]
	metrics  = ['precision', 'recall', 'dice']

	results = []
	try:
		df = pd.read_csv(filepath)

		best_epoch = df["best_epoch"].max() - 1  #best_epoch counting starts from 1
		best_row = df.loc[df["epoch"]==best_epoch].drop('best_epoch', axis=1)
		best_row.drop('mean_dice', inplace=True, axis=1) # Drop here because we recalculate

		for metric in metrics:
			# Calculate mean per metric, ignoring background
			best_row[f"mean_{metric}"] = best_row.loc[:, ~best_row.columns.str.contains('background')].filter(like=metric).mean(axis=1)

		return best_row
	except:
		print(f"Something went wrong at {filepath} probably due to an empty .csv")
		return pd.DataFrame([])

In [3]:
def summarize_cv(basepath):

    result = []
    for filepath in glob.glob(basepath + '/' + '/*/*/epoch_results/*.csv'):
        best_epoch = extract_best_epochs(filepath)

        result.append(best_epoch)

    if len(result) > 1:
        df_result = pd.concat(result)
        df_result.drop(['epoch', 'train_loss'], inplace=True, axis=1)

        return df_result.mean()
    else:
        return pd.Series([], dtype='object')


In [4]:
def summarize_test(basepath):
    """This function retrieves the test results located in test_postprocess and 
    returns the averages performance per organ and overall.
    """
    result = []
    for filepath in glob.glob(basepath + '/*/*/test_postprocess/*.csv'):
        df = pd.read_csv(filepath)
        metrics  = ['precision', 'recall', 'dice']

        for metric in metrics:
            # Calculate mean per metric, ignoring background
            df[f"mean_{metric}"] = df.loc[:, ~df.columns.str.contains('background')].filter(like=metric).mean(axis=1)

        result.append(df.mean(numeric_only=True).to_frame().T)
        
    if len(result) > 1:
        result = pd.concat(result)

        return result.mean()
    elif len(result) == 1:
        return result[0].squeeze()
    else:
        return pd.Series([], dtype='object')

In [5]:
def analyze_results(basepath='/export/scratch3/grewal/OAR_segmentation/runs/final_experiments'):
	"""Analyze all the runs contained in the basepath.
	
	The output of this function is a dict with keys (experiment, test/train) and value
	the mean over all the folds.
	"""
	result = {}
	for experiment in os.listdir(basepath):

		experiment_path = basepath + '/' + experiment
		result_cv = summarize_cv(experiment_path)

		if not result_cv.empty:
			result[(experiment, 'train')] = result_cv

		result_test = summarize_test(experiment_path)

		if not result_test.empty:
			result[(experiment, 'test')] = result_test
		
	return result

In [9]:
result = analyze_results()

Something went wrong at /export/scratch3/grewal/OAR_segmentation/runs/final_experiments/basic-teacher-full-32/fold0/run0/epoch_results/epoch_results.csv probably due to an empty .csv
Something went wrong at /export/scratch3/grewal/OAR_segmentation/runs/final_experiments/good-baseline-full-32/fold0/run0/epoch_results/epoch_results.csv probably due to an empty .csv
Something went wrong at /export/scratch3/grewal/OAR_segmentation/runs/final_experiments/basic-teacher-basic-student-full/fold0/run0/epoch_results/epoch_results.csv probably due to an empty .csv


In [10]:
list(result.keys())

[('good-baseline-full-100epochs_18112022_153147', 'test'),
 ('good-baseline-folds-32', 'train'),
 ('good-baseline-folds-32', 'test'),
 ('basic-teacher-basic-student-folds', 'train'),
 ('basic-teacher-basic-student-folds', 'test'),
 ('basic-teacher-full-32', 'test'),
 ('good-baseline-full-32', 'test'),
 ('basic-teacher-basic-student-full', 'test'),
 ('good-baseline-full-noisy-data-100epochs_18112022_153816', 'test'),
 ('good-baseline-full-noisy-data_18112022_121453', 'test'),
 ('basic-teacher-folds-32', 'train'),
 ('basic-teacher-folds-32', 'test')]

In [11]:
result[('basic-teacher-full-32', 'test')]

recall_background       0.999714
precision_background    0.997810
dice_background         0.999524
recall_bowel_bag        0.776952
precision_bowel_bag     0.900190
dice_bowel_bag          0.826952
recall_bladder          0.902095
precision_bladder       0.905524
dice_bladder            0.897524
recall_hip              0.945619
precision_hip           0.899714
dice_hip                0.921048
recall_rectum           0.776000
precision_rectum        0.744857
dice_rectum             0.739619
mean_dice               0.846286
mean_precision          0.862571
mean_recall             0.850167
Name: 0, dtype: float64