In [1]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [2]:
def extract_best_epochs(filepath):
	"""Extract the best epoch results (for each fold) for a given experiment."""
	metrics  = ['precision', 'recall', 'dice']

	try:
		df = pd.read_csv(filepath)

		best_epoch = df["best_epoch"].max() - 1  #best_epoch counting starts from 1
		best_row = df.loc[df["epoch"]==best_epoch].drop('best_epoch', axis=1)
		best_row.drop('mean_dice', inplace=True, axis=1) # Drop here because we recalculate

		for metric in metrics:
			# Calculate mean per metric, ignoring background
			best_row[f"mean_{metric}"] = best_row.loc[:, ~best_row.columns.str.contains('background')].filter(like=metric).mean(axis=1)

		return best_row
	except:
		print(f"Something went wrong at {filepath} probably due to an empty .csv")
		return pd.DataFrame([])

In [3]:
def summarize_cv(basepath):

    result = []
    for filepath in glob.glob(basepath + '/' + '/*/*/epoch_results/*.csv'):
        best_epoch = extract_best_epochs(filepath)

        result.append(best_epoch)

    if len(result) > 1:
        df_result = pd.concat(result)
        df_result.drop(['epoch', 'train_loss'], inplace=True, axis=1)

        # STD
        df_std = df_result.copy()
        df_std.columns = df_std.columns + "_std"
        df_std = df_std.std()

        # mean
        df_mean = df_result.mean()

        result = pd.concat([df_mean, df_std])

        return result
    else:
        return pd.Series([], dtype='object')


In [4]:
def summarize_test(basepath):
    """This function retrieves the test results located in test_postprocess and 
    returns the averages performance per organ and overall.
    """
    result = []
    for filepath in glob.glob(basepath + '/*/*/test_postprocess/*.csv'):
        df = pd.read_csv(filepath)
        metrics  = ['precision', 'recall', 'dice']

        for metric in metrics:
            # Calculate mean per metric, ignoring background
            df[f"mean_{metric}"] = df.loc[:, ~df.columns.str.contains('background')].filter(like=metric).mean(axis=1)

        result.append(df.mean(numeric_only=True).to_frame().T)
        
    if len(result) > 1:
        result = pd.concat(result)

        df_std = result.copy()
        df_std.columns = df_std.columns + "_std"
        df_std = df_std.std()

        df_mean = result.mean()

        return pd.concat([df_mean, df_std])
    elif len(result) == 1:
        return result[0].squeeze()
    else:
        return pd.Series([], dtype='object')

In [5]:
import re 

def is_std(x):
    if 'std' in x:
        return 'std'
    else:
        return 'value'

In [8]:
def analyze_results(basepath='/export/scratch3/grewal/OAR_segmentation/runs/final_experiments'):
	"""Analyze all the runs contained in the basepath.
	
	The output of this function is a dict with keys (experiment, test/train) and value
	the mean over all the folds.
	"""
	with pd.ExcelWriter('./output/experiment_results.xlsx') as writer: 
		for experiment in os.listdir(basepath):

			experiment_path = basepath + '/' + experiment
			result_cv = summarize_cv(experiment_path)

			if not result_cv.empty:
				result_cv = result_cv.to_frame().reset_index().rename(columns={"index": "key", 0:"value"})
				result_cv['train/test'] = 'train'

			result_test = summarize_test(experiment_path)

			if not result_test.empty:
				result_test = result_test.to_frame().reset_index().rename(columns={"index": "key", 0:"value"})
				result_test['train/test'] = 'test'
			
			if (not result_cv.empty) and (not result_test.empty):
				result = pd.concat([result_cv, result_test])
				
				result['organ'] = result['key'].apply(lambda x: re.findall(r'(background|bladder|bowel_bag|hip|rectum|mean)', x)[0])
				result['metric'] = result['key'].apply(lambda x: re.findall(r'(recall|dice|precision)', x)[0])
				result['std'] = result['key'].apply(lambda x: is_std(x))

				# fix ordering
				result['organ'] = pd.Categorical(result['organ'], ['background', 'bladder', 'bowel_bag','hip','rectum','mean'])
				result['train/test'] = pd.Categorical(result['train/test'], ['train', 'test'])
				result['std'] = pd.Categorical(result['std'], ['value', 'std'])
			
				# Only one value so sum does not do anything
				result = result.groupby(['train/test', 'organ', 'metric', 'std']).sum(numeric_only=True)

				result.to_excel(writer, sheet_name=experiment)
		else:
			print("missing train or test")
    

In [9]:
analyze_results()



Something went wrong at /export/scratch3/grewal/OAR_segmentation/runs/final_experiments/basic-teacher-full-32/fold0/run0/epoch_results/epoch_results.csv probably due to an empty .csv
Something went wrong at /export/scratch3/grewal/OAR_segmentation/runs/final_experiments/good-baseline-full-32/fold0/run0/epoch_results/epoch_results.csv probably due to an empty .csv
Something went wrong at /export/scratch3/grewal/OAR_segmentation/runs/final_experiments/basic-teacher-basic-student-full/fold0/run0/epoch_results/epoch_results.csv probably due to an empty .csv
missing train or test
