In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
RESULTS_TEST_SET_CSV  = '../results/test_set/ensemble_results_test_set.csv'
RESULTS_KH_MALE_CSV   = '../results/kh_ae_male.csv'
RESULTS_KH_FEMALE_CSV = '../results/kh_ae_female.csv'

### Load results

In [None]:
df_results_dl_ensemble = pd.read_csv(RESULTS_TEST_SET_CSV)
df_results_kh_male     = pd.read_csv(RESULTS_KH_MALE_CSV)
df_results_kh_female   = pd.read_csv(RESULTS_KH_FEMALE_CSV)

In [None]:
df_results_dl_ensemble[:10]

In [None]:
df_results_kh_male[:10]

In [None]:
df_results_kh_female[:10]

In [None]:
y_true       = df_results_dl_ensemble['y_true'].to_numpy()
abs_error_dl = df_results_dl_ensemble['error'].to_numpy()
sex          = df_results_dl_ensemble['sex'].to_numpy()
uncertainty  = df_results_dl_ensemble['uncertainty'].to_numpy()

In [None]:
y_true_m       = y_true[sex==0.0]
y_true_f       = y_true[sex==1.0]
abs_error_dl_m = abs_error_dl[sex==0.0]
abs_error_dl_f = abs_error_dl[sex==1.0]
uncertainty_m  = uncertainty[sex==0.0]
uncertainty_f  = uncertainty[sex==1.0]

In [None]:
abs_error_kh_m = []
abs_error_kh_f = []

for age_, sex_ in zip(y_true, sex):
    # Round true age to match Kellinghaus table
    age_ = np.round(age_, decimals=2)
    
    if sex_==0.0:
        idx   = np.where(df_results_kh_male['age']==age_)[0][0]
        error = df_results_kh_male.loc[idx, 'ae']
        abs_error_kh_m.append(error)
    elif sex_==1.0:
        idx   = np.where(df_results_kh_female['age']==age_)[0][0]
        error = df_results_kh_female.loc[idx, 'ae']
        abs_error_kh_f.append(error)
    else:
        raise ValueError('Bad sex value <{}>.'.format(sex_))

abs_error_kh_m = np.asarray(abs_error_kh_m)
abs_error_kh_f = np.asarray(abs_error_kh_f)

abs_error_kh_m.shape, abs_error_kh_f.shape

In [None]:
# Sanity check: Are the errors for the test set on the curve?
fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax[0].plot(df_results_kh_female['age'], df_results_kh_female['ae'])
ax[0].scatter(y_true_f, abs_error_kh_f)
ax[1].plot(df_results_kh_male['age'], df_results_kh_male['ae'])
ax[1].scatter(y_true_m, abs_error_kh_m)
ax[0].set_ylim(0,4)
ax[1].set_ylim(0,4)
plt.show()

### Error plot

In [None]:
# Set bins
bins      = np.linspace(15, 30, num=7)
positions = [(bins[i]+bins[i+1])/2.0 for i in range(len(bins)-1)]
bins, positions

In [None]:
male_bin_inds   = np.digitize(y_true_m, bins, right=False)
female_bin_inds = np.digitize(y_true_f, bins, right=False)

# Bin age (sanity check)
male_age_binned = [y_true_m[male_bin_inds==i] for i in np.unique(male_bin_inds)]
female_age_binned = [y_true_f[female_bin_inds==i] for i in np.unique(female_bin_inds)]

# Bin ensemble results
ens_male_ae_binned   = [abs_error_dl_m[male_bin_inds==i] for i in np.unique(male_bin_inds)]
ens_female_ae_binned = [abs_error_dl_f[female_bin_inds==i] for i in np.unique(female_bin_inds)]

# Bin Kellinghaus results
kh_male_ae_binned   = [abs_error_kh_m[male_bin_inds==i] for i in np.unique(male_bin_inds)]
kh_female_ae_binned = [abs_error_kh_f[female_bin_inds==i] for i in np.unique(female_bin_inds)]

In [None]:
mae_ensemble        = np.mean(abs_error_dl)
mae_ensemble_male   = np.mean(abs_error_dl_m)
mae_ensemble_female = np.mean(abs_error_dl_f)
print('MAE test set        = {:.2f} years'.format(mae_ensemble))
print('MAE test set male   = {:.2f} years'.format(mae_ensemble_male))
print('MAE test set female = {:.2f} years'.format(mae_ensemble_female))

In [None]:
mae_kh        = np.mean(np.concatenate([abs_error_kh_m, abs_error_kh_f]))
mae_kh_male   = np.mean(abs_error_kh_m)
mae_kh_female = np.mean(abs_error_kh_f)

max_kh_male   = np.max(abs_error_kh_m)
max_kh_female = np.max(abs_error_kh_f)

p90_kh_male   = np.percentile(abs_error_kh_m, q=90)
p90_kh_female = np.percentile(abs_error_kh_f, q=90)

print('MAE KH test set          = {:.2f} years'.format(mae_kh))
print('MAE KH teset male male   = {:.2f} years'.format(mae_kh_male))
print('MAE KH teset male female = {:.2f} years\n'.format(mae_kh_female))

print('Max KH teset male male   = {:.2f} years'.format(max_kh_male))
print('Max KH teset male female = {:.2f} years\n'.format(max_kh_female))

print('P90 KH teset male male   = {:.2f} years'.format(p90_kh_male))
print('P90 KH teset male female = {:.2f} years'.format(p90_kh_female))

In [None]:
positions_ens = [x-0.42 for x in positions]
positions_kh  = [x+0.42 for x in positions]
xtick_labels  = ['15.0-17.5','17.5-20.0','20.0-22.5','22.5-25.0','25.0-27.5','27.5-30.0']

width = 0.7
lw = 1.5

# colors
# - '#00ccb0' and '#f88f6f'

boxprops_ens = {'linewidth': lw, 'facecolor': '#8fb39f'}
boxprops_kh  = {'linewidth': lw, 'facecolor': '#f9f1cb'}
medianprops  = {'linewidth': lw, 'color': 'black'}
whiskerprops = {'linewidth': lw}
capprops     = {'linewidth': lw}
flierprops   = {'marker': 'D', 'markersize': 5, 'markerfacecolor': 'lightgray'}

fig, ax = plt.subplots(1,2, figsize=(20,10))

# female results
bp1 = ax[0].boxplot(ens_female_ae_binned,
    positions=positions_ens,
    widths=width,
    patch_artist=True,
    boxprops=boxprops_ens,
    medianprops=medianprops,
    whiskerprops=whiskerprops,
    capprops=capprops,
    flierprops=flierprops
)
bp2 = ax[0].boxplot(kh_female_ae_binned,
    positions=positions_kh,
    widths=width,
    patch_artist=True,
    boxprops=boxprops_kh,
    medianprops=medianprops,
    whiskerprops=whiskerprops,
    capprops=capprops,
    flierprops=flierprops
)

# male results
bp3 = ax[1].boxplot(ens_male_ae_binned,
    positions=positions_ens,
    widths=width,
    patch_artist=True,
    boxprops=boxprops_ens,
    medianprops=medianprops,
    whiskerprops=whiskerprops,
    capprops=capprops,
    flierprops=flierprops
)
bp4 = ax[1].boxplot(kh_male_ae_binned,
    positions=positions_kh,
    widths=width,
    patch_artist=True,
    boxprops=boxprops_kh,
    medianprops=medianprops,
    whiskerprops=whiskerprops,
    capprops=capprops,
    flierprops=flierprops
)

for axis in ax:
    axis.set_xticks(ticks=positions, labels=xtick_labels, rotation=45, ha='right')
    axis.set_xlim(14.75,30.25)
    axis.set_ylim(0.0,7.8)
    axis.tick_params(labelsize=16, size=4)
    axis.set_xlabel('true age / (years)', fontsize=16, labelpad=20)
    axis.set_ylabel('absolute prediction error / (years)', fontsize=16, labelpad=20)

ax[0].legend([bp1["boxes"][0], bp2["boxes"][0]], ['deep learning', 'optimistic human reader estimate'], loc='upper right', fontsize=16)
ax[1].legend([bp3["boxes"][0], bp4["boxes"][0]], ['deep learning', 'optimistic human reader estimate'], loc='upper right', fontsize=16)

ax[0].set_title('female', fontsize=18)
ax[1].set_title('male', fontsize=18)

plt.savefig('../results/plots/error_plot.png', facecolor='white', bbox_inches='tight', dpi=300)
plt.show()

### Outlier plot

In [None]:
abstention_rate = 0.5

In [None]:
# Idenfity samples wiht low uncertainty
ref_unc_val = np.percentile(uncertainty, q=(1-abstention_rate)*100)

# Select low uncertainty samples
y_true_lu      = y_true[uncertainty<ref_unc_val]
abs_error_lu   = abs_error_dl[uncertainty<ref_unc_val]
sex_lu         = sex[uncertainty<ref_unc_val]
uncertainty_lu = uncertainty[uncertainty<ref_unc_val]

# Separate by sex
y_true_m_lu      = y_true_lu[sex_lu==0.0]
y_true_f_lu      = y_true_lu[sex_lu==1.0]
abs_error_m_lu   = abs_error_lu[sex_lu==0.0]
abs_error_f_lu   = abs_error_lu[sex_lu==1.0]
uncertainty_m_lu = uncertainty_lu[sex_lu==0.0]
uncertainty_f_lu = uncertainty_lu[sex_lu==1.0]

In [None]:
male_bin_inds_lu   = np.digitize(y_true_m_lu, bins, right=False)
female_bin_inds_lu = np.digitize(y_true_f_lu, bins, right=False)

# Bin age (sanity check)
male_age_binned_lu = [y_true_m_lu[male_bin_inds_lu==i] for i in np.unique(male_bin_inds_lu)]
female_age_binned_lu = [y_true_f_lu[female_bin_inds_lu==i] for i in np.unique(female_bin_inds_lu)]

# Bin ensemble results
ens_male_ae_binned_lu   = [abs_error_m_lu[male_bin_inds_lu==i] for i in np.unique(male_bin_inds_lu)]
ens_female_ae_binned_lu = [abs_error_f_lu[female_bin_inds_lu==i] for i in np.unique(female_bin_inds_lu)]

In [None]:
# Outlier in regular results
max_ae_ens_m = [np.max(x) for x in ens_male_ae_binned]
max_ae_ens_f = [np.max(x) for x in ens_female_ae_binned]

p90_ae_ens_m = [np.percentile(x, q=90) for x in ens_male_ae_binned]
p90_ae_ens_f = [np.percentile(x, q=90) for x in ens_female_ae_binned]

max_ae_kh_m = [np.max(x) for x in kh_male_ae_binned]
max_ae_kh_f = [np.max(x) for x in kh_female_ae_binned]

# Outlier in low uncertainty results
max_ae_ens_m_lu = [np.max(x) for x in ens_male_ae_binned_lu]
max_ae_ens_f_lu = [np.max(x) for x in ens_female_ae_binned_lu]

p90_ae_ens_m_lu = [np.percentile(x, q=90) for x in ens_male_ae_binned_lu]
p90_ae_ens_f_lu = [np.percentile(x, q=90) for x in ens_female_ae_binned_lu]

In [None]:
markers = ['o', '^', '_', 's', 'd', '_']
size    = 250
colors  = ['red', 'orangered', 'k', 'blue', 'cornflowerblue', 'k']

fig, ax = plt.subplots(1, 2, figsize=(20,10))

ax[0].scatter(positions, np.max([max_ae_ens_f, max_ae_ens_m], axis=0), marker=markers[0], s=size, c=colors[0], edgecolor='k', lw=1.5, alpha=0.9, label='max (AgeNet)')
ax[0].scatter(positions, np.max([p90_ae_ens_f, p90_ae_ens_m], axis=0), marker=markers[1], s=size, c=colors[1], edgecolor='k', lw=1.5, alpha=0.9, label='p90 (AgeNet)')
ax[0].scatter(positions, np.max([max_ae_kh_f, max_ae_kh_m], axis=0),   marker=markers[2], s=size, c=colors[2], lw=2, label='max (standard method)')

ax[1].scatter(positions, np.max([max_ae_ens_f_lu, max_ae_ens_m_lu], axis=0), marker=markers[3], s=size, c=colors[3], edgecolor='k', alpha=0.9, lw=1.5, label='max (AgeNet)')
ax[1].scatter(positions, np.max([p90_ae_ens_f_lu, p90_ae_ens_m_lu], axis=0), marker=markers[4], s=size, c=colors[4], edgecolor='k', alpha=0.9, lw=1.5, label='p90 (AgeNet)')
ax[1].scatter(positions, np.max([max_ae_kh_f, max_ae_kh_m], axis=0),         marker=markers[5], s=size, c=colors[5], lw=2, label='max (standard method)')

for axis in ax:
    axis.set_xticks(ticks=positions, labels=xtick_labels, rotation=45, ha='right')
    axis.set_xlim(14.75,30.25)
    axis.set_ylim(0.0,7.8)
    axis.tick_params(labelsize=16, size=4)
    axis.set_xlabel('true age / (years)', fontsize=16, labelpad=20)
    axis.set_ylabel('absolute prediction error / (years)', fontsize=16, labelpad=20)
    axis.legend(fontsize=16)
ax[0].set_title('all predictions (no abstention)', fontsize=18)
ax[1].set_title(f'most certain predictions (abstention rate = {str(abstention_rate):s})', fontsize=18)
plt.suptitle('female and male combined', fontsize=18)

plt.savefig('../results/plots/outlier_plot.png', facecolor='white', bbox_inches='tight', dpi=300)
plt.show()

In [None]:
markers = ['o', '*', '_', 's', 'd', '_']
size    = 300
colors  = ['#8fb39f', '#f9f1cb', 'k', '#8fb39f', '#f9f1cb', 'k']

fig, ax = plt.subplots(1, 2, figsize=(20,10))

ax[0].scatter(positions, max_ae_ens_f, marker=markers[0], s=size, c=colors[0], edgecolor='k', lw=1.5, alpha=0.9, label='max (AgeNet)')
ax[0].scatter(positions, p90_ae_ens_f, marker=markers[1], s=size, c=colors[1], edgecolor='k', lw=1.5, alpha=0.9, label='p90 (AgeNet)')
ax[0].scatter(positions, max_ae_kh_f,  marker=markers[2], s=size, c=colors[2], lw=2, label='max (standard method)')

ax[1].scatter(positions, max_ae_ens_f_lu, marker=markers[3], s=size, c=colors[3], edgecolor='k', alpha=0.9, lw=1.5, label='max (AgeNet)')
ax[1].scatter(positions, p90_ae_ens_f_lu, marker=markers[4], s=size, c=colors[4], edgecolor='k', alpha=0.9, lw=1.5, label='p90 (AgeNet)')
ax[1].scatter(positions, max_ae_kh_f,     marker=markers[5], s=size, c=colors[5], lw=2, label='max (standard method)')

for axis in ax:
    axis.set_xticks(ticks=positions, labels=xtick_labels, rotation=45, ha='right')
    axis.set_xlim(14.75,30.25)
    axis.set_ylim(0.0,7.8)
    axis.tick_params(labelsize=16, size=4)
    axis.set_xlabel('true age / (years)', fontsize=16, labelpad=20)
    axis.set_ylabel('absolute prediction error / (years)', fontsize=16, labelpad=20)
    axis.legend(fontsize=16)

plt.suptitle('female', fontsize=18)
plt.show()

fig, ax = plt.subplots(1, 2, figsize=(20,10))

ax[0].scatter(positions, max_ae_ens_m, marker=markers[0], s=size, c=colors[0], edgecolor='k', lw=1.5, alpha=0.9, label='max (AgeNet)')
ax[0].scatter(positions, p90_ae_ens_m, marker=markers[1], s=size, c=colors[1], edgecolor='k', lw=1.5, alpha=0.9, label='p90 (AgeNet)')
ax[0].scatter(positions, max_ae_kh_m,  marker=markers[2], s=size, c=colors[2], lw=2, label='max (standard method)')

ax[1].scatter(positions, max_ae_ens_m_lu, marker=markers[3], s=size, c=colors[3], edgecolor='k', alpha=0.9, lw=1.5, label='max (AgeNet)')
ax[1].scatter(positions, p90_ae_ens_m_lu, marker=markers[4], s=size, c=colors[4], edgecolor='k', alpha=0.9, lw=1.5, label='p90 (AgeNet)')
ax[1].scatter(positions, max_ae_kh_m,     marker=markers[5], s=size, c=colors[5], lw=2, label='max (standard method)')

for axis in ax:
    axis.set_xticks(ticks=positions, labels=xtick_labels, rotation=45, ha='right')
    axis.set_xlim(14.75,30.25)
    axis.set_ylim(0.0,7.8)
    axis.tick_params(labelsize=16, size=4)
    axis.set_xlabel('true age / (years)', fontsize=16, labelpad=20)
    axis.set_ylabel('absolute prediction error / (years)', fontsize=16, labelpad=20)
    axis.legend(fontsize=16)

plt.suptitle('male', fontsize=18)
plt.show()

### Abstention-Performance Trade-Off

In [None]:
# Total number of test samples
n_elem = abs_error_dl.shape[0]

# Indicies to sort results by ascending uncertainty
order_asc_uc = np.argsort(uncertainty)

# Sort error und uncertainty by ascending uncertainty
ordered_abs_error   = abs_error_dl[order_asc_uc]
ordered_uncertainty = uncertainty[order_asc_uc]

In [None]:
# Set abstention rates
abs_rates = np.linspace(0.0,1.0, num=300)
abs_rates = abs_rates[:-1]

In [None]:
# Calculate metrics for different abstention rates
abs_maes     = []
abs_max_errs = []
abs_p90_errs = []

for rate in abs_rates:
    ordered_error_ = ordered_abs_error[:int((1-rate)*n_elem)]

    mae_     = np.mean(ordered_error_)
    max_err_ = np.max(ordered_error_)
    p90_err_ = np.percentile(ordered_error_, q=90.0)
    
    if len(abs_p90_errs) > 0:
        mae_     = np.min([mae_, np.min(abs_maes)])
        p90_err_ = np.min([p90_err_, np.min(abs_p90_errs)])

    abs_maes.append(mae_)
    abs_max_errs.append(max_err_)
    abs_p90_errs.append(p90_err_)

In [None]:
idx_max_equal = 0
while(abs_max_errs[idx_max_equal] > np.max(np.concatenate([abs_error_kh_m,abs_error_kh_f]))):
    idx_max_equal+=1

print('max abs. error (standard method) = {:.3f}'.format(np.max(np.concatenate([abs_error_kh_m,abs_error_kh_f]))))
print('max abs. error (deep learning)   = {:.3f} @ abst. rate = {:.3f}'.format(abs_max_errs[idx_max_equal], abs_rates[idx_max_equal]))

In [None]:
idx_p90_equal = 0
while(abs_p90_errs[idx_p90_equal] > np.percentile(np.concatenate([abs_error_kh_m,abs_error_kh_f]), q=90)):
    idx_p90_equal+=1

print('p90 abs. error (standard method) = {:.3f}'.format(np.percentile(np.concatenate([abs_error_kh_m,abs_error_kh_f]), q=90)))
print('p90 abs. error (deep learning)   = {:.3f} @ abst. rate = {:.3f}'.format(abs_p90_errs[idx_p90_equal], abs_rates[idx_p90_equal]))

In [None]:
lw = 2
colors = ['#ff931d', '#00a769', 'black']

plt.figure(figsize=(10,10))
plt.axhline(y=np.max(np.concatenate([abs_error_kh_m,abs_error_kh_f])), ls='--', lw=lw, c=colors[0], label='max error (HRE)')
plt.axhline(y=np.percentile(np.concatenate([abs_error_kh_m,abs_error_kh_f]), q=90), ls='-.', lw=lw, c=colors[1], label='p90 error (HRE)')
plt.axhline(y=np.mean(np.concatenate([abs_error_kh_m,abs_error_kh_f])), ls=':', lw=lw, c=colors[2], label='MAE (HRE)')
plt.plot(abs_rates*100, abs_max_errs, lw=lw, c=colors[0], label='max error (deep learning)')
plt.plot(abs_rates*100, abs_p90_errs, lw=lw, c=colors[1], label='p90 error (deep learning)')
plt.plot(abs_rates*100, abs_maes,     lw=lw, c=colors[2], label='MAE (deep learning)')
plt.ylim(bottom=0.0)
plt.tick_params(labelsize=16, size=4)
plt.xlabel('abstention rate / (%)', fontsize=16)
plt.ylabel('absolute error / (years)', fontsize=16)
plt.legend(fontsize=14)

plt.savefig('../results/plots/abstention_performance_trade_off.png', facecolor='white', bbox_inches='tight', dpi=100)
plt.show()

### Best and worst predictions

In [None]:
best_female_image_file  = '../data/preprocessed/ae_1789_0_0.npy'
best_male_image_file    = '../data/preprocessed/ae_3507_0_0.npy'
worst_female_image_file = '../data/preprocessed/ae_1641_0_0.npy'
worst_male_image_file   = '../data/preprocessed/ae_2963_0_0.npy'

In [None]:
best_female_image  = np.load(best_female_image_file)
best_male_image    = np.load(best_male_image_file)
worst_female_image = np.load(worst_female_image_file)
worst_male_image   = np.load(worst_male_image_file)

In [None]:
best_female_image  = np.clip(best_female_image, -250, 1250)
best_male_image    = np.clip(best_male_image, -250, 1250)
worst_female_image = np.clip(worst_female_image, -250, 1250)
worst_male_image   = np.clip(worst_male_image, -250, 1250)

In [None]:
best_female_image.shape

In [None]:
plt.figure(figsize=(12,22.4))
plt.imshow(best_female_image[40,:], cmap='gist_gray')
plt.tick_params(labelsize=16, size=4)
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
plt.title('best prediction (female)', fontsize=18)
plt.savefig('../results/plots/best_pred_f.png', facecolor='white', bbox_inches='tight', dpi=100)
plt.show()

In [None]:
plt.figure(figsize=(12,22.4))
plt.imshow(best_male_image[46,:], cmap='gist_gray')
plt.tick_params(labelsize=16, size=4)
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
plt.title('best prediction (male)', fontsize=18)
plt.savefig('../results/plots/best_pred_m.png', facecolor='white', bbox_inches='tight', dpi=100)
plt.show()

In [None]:
plt.figure(figsize=(12,22.4))
plt.imshow(worst_female_image[38,:], cmap='gist_gray')
plt.tick_params(labelsize=16, size=4)
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
plt.title('worst prediction (female)', fontsize=18)
plt.savefig('../results/plots/worst_pred_f.png', facecolor='white', bbox_inches='tight', dpi=100)
plt.show()

In [None]:
plt.figure(figsize=(12,22.4))
plt.imshow(worst_male_image[38,:], cmap='gist_gray')
plt.tick_params(labelsize=16, size=4)
plt.xlabel('x', fontsize=16)
plt.ylabel('y', fontsize=16)
plt.title('worst prediction (male)', fontsize=18)
plt.savefig('../results/plots/worst_pred_m.png', facecolor='white', bbox_inches='tight', dpi=100)
plt.show()