# Evaluation

In [44]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../../')

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import datetime

import numpy as np
import pandas as pd

from src.data import train_test_split, MRISequence
from src.model import create_model, compile_model, load_checkpoint
from src.model.evaluation import show_metrics

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [116]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="white")

plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['image.cmap'] = 'viridis'

%config InlineBackend.figure_format='retina'
plt.rcParams.update({'font.size': 15})

In [154]:
from src.evaluation.segmentation_masks import SegmentationMasksSaver

ROOT_DIR = '../../tmp'

saver = SegmentationMasksSaver(ROOT_DIR)

In [155]:
saver.df.head()

Unnamed: 0,notebook_key,y_true__mean,y_pred__mean,arr_heat_sum__0__mean,arr_heat_sum__1__mean,arr_heat_sum__2__mean,arr_heat_sum__3__mean,arr_heat_sum__4__mean,arr_heat_sum_norm__0__mean,arr_heat_sum_norm__1__mean,...,insertion__auc_min,insertion__auc_std,deletion__heatmaps,deletion__auc_mean,deletion__auc_p25,deletion__auc_median,deletion__auc_p75,deletion__auc_max,deletion__auc_min,deletion__auc_std
0,captum--guided-grad-cam,0.5,0.488614,8952.845436,2008.44571,1411.628132,190.793925,10.16276,0.009552,0.007861,...,0.359304,0.176769,40,0.55384,0.237776,0.524651,0.856523,0.919231,0.1166,0.302225
1,captum--grad-cam,0.5,0.488614,96272.987411,24757.087534,18823.900655,1789.623854,105.897631,0.102545,0.097233,...,0.411934,0.101766,40,0.513895,0.452784,0.513192,0.555588,0.785576,0.31701,0.103037
2,risei--3d-cnn--m+2048--b1+0--b2+1--s+8--p1+0.25,0.5,0.488614,536722.35483,151643.803119,96426.4286,9972.785964,980.848283,0.575552,0.574063,...,0.394077,0.088723,40,0.564541,0.505757,0.565032,0.602045,0.779333,0.402835,0.07973
3,risei--3d-cnn--m+2048--b1+0--b2+1--s+16--p1+0.25,0.5,0.488614,542537.846278,154568.07755,97910.540762,10286.938149,1014.107901,0.582019,0.584605,...,0.373466,0.092804,40,0.575466,0.521729,0.570917,0.649461,0.73844,0.3637,0.087377
4,captum--guided-backprop,0.5,0.488614,42151.848967,12439.041002,7837.9258,1125.75961,98.477822,0.045209,0.047082,...,0.368946,0.200813,40,0.380245,0.114532,0.348439,0.615156,0.766453,0.076679,0.263189


In [156]:
list(saver.df.columns)

['notebook_key',
 'y_true__mean',
 'y_pred__mean',
 'arr_heat_sum__0__mean',
 'arr_heat_sum__1__mean',
 'arr_heat_sum__2__mean',
 'arr_heat_sum__3__mean',
 'arr_heat_sum__4__mean',
 'arr_heat_sum_norm__0__mean',
 'arr_heat_sum_norm__1__mean',
 'arr_heat_sum_norm__2__mean',
 'arr_heat_sum_norm__3__mean',
 'arr_heat_sum_norm__4__mean',
 'arr_heat_sum_gain__0__mean',
 'arr_heat_sum_gain__1__mean',
 'arr_heat_sum_gain__2__mean',
 'arr_heat_sum_gain__3__mean',
 'arr_heat_sum_gain__4__mean',
 'arr_heat_sum_gain_other__mean',
 'y_true__std',
 'y_pred__std',
 'arr_heat_sum__0__std',
 'arr_heat_sum__1__std',
 'arr_heat_sum__2__std',
 'arr_heat_sum__3__std',
 'arr_heat_sum__4__std',
 'arr_heat_sum_norm__0__std',
 'arr_heat_sum_norm__1__std',
 'arr_heat_sum_norm__2__std',
 'arr_heat_sum_norm__3__std',
 'arr_heat_sum_norm__4__std',
 'arr_heat_sum_gain__0__std',
 'arr_heat_sum_gain__1__std',
 'arr_heat_sum_gain__2__std',
 'arr_heat_sum_gain__3__std',
 'arr_heat_sum_gain__4__std',
 'arr_heat_sum_gai

In [157]:
new_data = {'notebook_key': [], 'metric': [], 'value': []}
df = saver.df.set_index('notebook_key')
for notebook_key, values in df.iterrows():
    for key, value in values.items():
        new_data['notebook_key'].append(notebook_key)
        new_data['metric'].append(key)
        new_data['value'].append(value)

new_df = pd.DataFrame(data=new_data)

In [158]:
new_df.head()

Unnamed: 0,notebook_key,metric,value
0,captum--guided-grad-cam,y_true__mean,0.5
1,captum--guided-grad-cam,y_pred__mean,0.488614
2,captum--guided-grad-cam,arr_heat_sum__0__mean,8952.845436
3,captum--guided-grad-cam,arr_heat_sum__1__mean,2008.44571
4,captum--guided-grad-cam,arr_heat_sum__2__mean,1411.628132


In [159]:
# higher is better
new_df[new_df['metric'] == 'insertion__auc_median']

Unnamed: 0,notebook_key,metric,value
129,captum--guided-grad-cam,insertion__auc_median,0.819407
271,captum--grad-cam,insertion__auc_median,0.64756
413,risei--3d-cnn--m+2048--b1+0--b2+1--s+8--p1+0.25,insertion__auc_median,0.595248
555,risei--3d-cnn--m+2048--b1+0--b2+1--s+16--p1+0.25,insertion__auc_median,0.588939
697,captum--guided-backprop,insertion__auc_median,0.773093


In [160]:
# lower is better
new_df[new_df['metric'] == 'deletion__auc_median']

Unnamed: 0,notebook_key,metric,value
137,captum--guided-grad-cam,deletion__auc_median,0.524651
279,captum--grad-cam,deletion__auc_median,0.513192
421,risei--3d-cnn--m+2048--b1+0--b2+1--s+8--p1+0.25,deletion__auc_median,0.565032
563,risei--3d-cnn--m+2048--b1+0--b2+1--s+16--p1+0.25,deletion__auc_median,0.570917
705,captum--guided-backprop,deletion__auc_median,0.348439


In [161]:
# gain brain vs non-brain areas, higher is better
new_df[new_df['metric'] == 'arr_heat_sum_gain_other__50%']

Unnamed: 0,notebook_key,metric,value
89,captum--guided-grad-cam,arr_heat_sum_gain_other__50%,0.646925
231,captum--grad-cam,arr_heat_sum_gain_other__50%,0.689266
373,risei--3d-cnn--m+2048--b1+0--b2+1--s+8--p1+0.25,arr_heat_sum_gain_other__50%,0.997145
515,risei--3d-cnn--m+2048--b1+0--b2+1--s+16--p1+0.25,arr_heat_sum_gain_other__50%,0.998381
657,captum--guided-backprop,arr_heat_sum_gain_other__50%,1.048978
