In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from   pathlib import Path, PurePath
from   scipy.ndimage import gaussian_filter1d as g1d
import seaborn as sns
from   dl_rad_age.evaluation import create_results_folder_name, evaluate_run, filter_runs, get_model_predictions, get_run_metrics, get_run_result, get_runs

In [None]:
LOGS_DIR    = '../lightning_logs'
RESULTS_DIR = '../results/dgx_runs_on_test_set'
ANNOTS_TEST = '../metadata/annotations_valid.csv'

RESULTS_TEST_SET_CSV  = '../results/test_set/ensemble_results_test_set.csv'
RESULTS_KH_MALE_CSV   = '../results/kh_ae_male.csv'
RESULTS_KH_FEMALE_CSV = '../results/kh_ae_female.csv'

### Runs

In [None]:
runs = get_runs(LOGS_DIR)
print(runs)

In [None]:
runs = filter_runs(runs, include=['agenet18_3d_aug_sex_ageloss2_lrs_pretrained_ensemble'], exclude=['autoencoder'])
print(runs), len(runs)

### Results

In [None]:
results = [get_run_result(x, LOGS_DIR) for x in runs]

data_results = np.array([runs, results]).swapaxes(0,1)
df_results   = pd.DataFrame(data=data_results, columns=['run', 'result'])
df_results   = df_results.sort_values(by='result')
df_results   = df_results.reset_index(drop=True)
df_results

### Training Curves

In [None]:
print('Create training curves for...')

for run in runs:
    print('Run: {:s}'.format(run))
    # Get metrics
    loss_epoch, loss, val_loss_epoch, val_loss = get_run_metrics(run, LOGS_DIR)

    # Create directories and filename for training curve
    training_curve_filepath = PurePath(RESULTS_DIR, create_results_folder_name(run), 'training_curve.png')
    Path(training_curve_filepath).parent.mkdir(parents=True, exist_ok=True)

    # Create plot
    plt.figure(figsize=(8,8))
    plt.plot(loss_epoch, loss,                  lw=1, c='cornflowerblue', alpha=0.33)
    plt.plot(loss_epoch, g1d(loss, 10),         lw=2, c='blue',           label='loss')
    plt.plot(val_loss_epoch, val_loss,          lw=1, c='orange',         alpha=0.33)
    plt.plot(val_loss_epoch, g1d(val_loss, 10), lw=2, c='orangered',      label='val_loss')
    plt.tick_params(labelsize=14, size=4)
    plt.xlabel('epoch', fontsize=14)
    plt.ylabel('loss', fontsize=14)
    plt.legend(fontsize=14)
    print('\tSave <{:s}>'.format(str(training_curve_filepath)))
    plt.savefig(training_curve_filepath, facecolor='white', bbox_inches='tight')
    plt.close()

### Evaluate runs

This may take a while

In [None]:
print('Create violin plots for...')
for run in runs:
    print('run: {:s}'.format(run))
    evaluate_run(run, annots=ANNOTS_TEST, logs_dir=LOGS_DIR, results_dir=RESULTS_DIR, track_image_files=True)

### Aggregate results

In [None]:
results_dirs      = [str(x) for x in Path(RESULTS_DIR).iterdir() if x.is_dir() and not str(x.name).startswith('.')]
results_filepaths = [str(PurePath(x, 'results.csv')) for x in results_dirs]
results_runs      = [str(Path(x).name) for x in results_dirs]

y_true      = pd.read_csv(results_filepaths[0])['y_true'].to_list()
sex         = pd.read_csv(results_filepaths[0])['sex'].to_list()
image_files = pd.read_csv(results_filepaths[0])['image_file'].to_list()
y_preds     = [pd.read_csv(x)['y_pred'].to_list() for x in results_filepaths]

#### Average result (of individual models)

In [None]:
y_err = np.abs(np.subtract(np.vstack(y_preds),y_true))

mae_models  = np.mean(y_err, axis=1)
mae_average = np.mean(mae_models)

print('Average MAE = {:.1f} +/- {:.1f}'.format(mae_average, np.std(mae_models)))

#### Ensemble result

In [None]:
y_pred_enbsemble = np.mean(y_preds, axis=0)
sd_pred_ensemble = np.std(y_preds, axis=0) # SD of the ensemble predictions for each sample
y_err_ensemble   = np.abs(np.subtract(y_pred_enbsemble, y_true))

In [None]:
results = [y_true, sex, list(y_pred_enbsemble), list(y_err_ensemble), list(sd_pred_ensemble), image_files]
cols    = ['y_true', 'sex', 'ensemble', 'error','uncertainty', 'image_file']
for i, y_pred in enumerate(y_preds):
    results.append(y_pred)
    cols.append('model_{:d}'.format(i+1))

results = np.asarray(results)

df_results = pd.DataFrame(data=results.swapaxes(0,1), columns=cols)
df_results.to_csv(RESULTS_TEST_SET_CSV, index=False)
df_results

In [None]:
mae_ensemble = np.mean(y_err_ensemble)
sd_ensemble  = np.mean(sd_pred_ensemble)

max_error       = np.max(y_err_ensemble)
sd_at_max_error = sd_pred_ensemble[np.argmax(y_err_ensemble)]
p90_error       = np.percentile(y_err_ensemble, q=90.0)

print('Ensemble MAE = {:.2f} +/- {:.2f}'.format(mae_ensemble, sd_ensemble))
print('Max. error   = {:.2f} +/- {:.2f}'.format(max_error, sd_at_max_error))
print('p90 err      = {:.2f}'.format(p90_error))