# Parameters

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../../..')

import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import datetime

import numpy as np
import pandas as pd

from src.data import train_test_split, MRISequence
from src.model import create_model, compile_model, load_checkpoint
from src.model.evaluation import show_metrics

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="white")

plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['image.cmap'] = 'viridis'

%config InlineBackend.figure_format='retina'
plt.rcParams.update({'font.size': 15})

In [3]:
import tensorflow as tf

# RANDOM_SEED = 250398
# tf.random.set_seed(RANDOM_SEED)

print(tf.version.VERSION)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

2.3.2
Num GPUs Available:  1


## Setup

In [4]:
%%time

ROOT_DIR = '../../../tmp'
DEFAULT_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'checkpoints')
DEFAULT_BCKP_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'bckp-checkpoints')

LOG_DIRECTORY = os.path.join(ROOT_DIR, 'logs')
CHECKPOINT_DIRECTORY = DEFAULT_CHECKPOINT_DIRECTORY_LOCAL

LOG_DIRECTORY_LOCAL = LOG_DIRECTORY
CHECKPOINT_DIRECTORY_LOCAL = CHECKPOINT_DIRECTORY

DATA_DIR_NAME = 'data-v3'
DATA_DIR = os.path.join(ROOT_DIR, DATA_DIR_NAME)

saliencies_and_segmentations_v2_path = os.path.join(ROOT_DIR, 'saliencies_and_segmentations_v2')

if not os.path.exists(CHECKPOINT_DIRECTORY):
    os.mkdir(CHECKPOINT_DIRECTORY)

if not os.path.exists(LOG_DIRECTORY):
    os.mkdir(LOG_DIRECTORY)

val = False

class_names = ['AD', 'CN']

# get paths to data
train_dir, test_dir, val_dir = train_test_split(
    saliencies_and_segmentations_v2_path,
    ROOT_DIR,
    split=(0.8, 0.15, 0.05),
    dirname=DATA_DIR_NAME)

# set the batch size for mri seq
batch_size = 12
input_shape = (104, 128, 104, 1) # (112, 112, 105, 1)
resize_img = True
crop_img = True

# if y is one-hot encoded or just scalar number
one_hot = True

# class weightss (see analysis notebook)
class_weights = {0: 0.8072289156626505, 1: 1.3137254901960784}

# description statistics of the dataset
desc = {'mean': -3.6344006e-09, 'std': 1.0000092, 'min': -1.4982183, 'max': 10.744175}

if 'desc' not in locals():
    print('initializing desc...')
    desc = get_description(MRISequence(
        train_dir,
        64,
        class_names=class_names,
        input_shape=input_shape),
        max_samples=None)
    print(desc)


normalization={ 'type':'normalization', 'desc': desc }
# normalization={'type':'standardization', 'desc':desc }

augmentations = None
augmentations_inplace = True
# enable augmentations in mri seq (otherwise it can be enabled in dataset)
# augmentations={ 'random_swap_hemispheres': 0.5 }

# initialize sequences
print('initializing train_seq...')
train_seq = MRISequence(
    train_dir,
    batch_size,
    class_names=class_names,
    augmentations=augmentations,
    augmentations_inplace=augmentations_inplace,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    class_weights=class_weights,
    normalization=normalization)

print('initializing test_seq...')
test_seq = MRISequence(
    test_dir,
    batch_size,
    class_names=class_names,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    normalization=normalization)

if val:
    print('initializing val_seq...')
    val_seq = MRISequence(
        val_dir,
        batch_size,
        class_names=class_names,
        input_shape=input_shape,
        resize_img=resize_img,
        crop_img=crop_img,
        one_hot=one_hot,
        class_weights=class_weights,
        normalization=normalization)
else:
    print('val_seq = test_seq')
    val_seq = test_seq

model_key = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
log_dir = os.path.join(LOG_DIRECTORY, model_key)
print(f'log_dir: {log_dir}')

not copying files since the destination directory already exists
initializing train_seq...
initializing test_seq...
val_seq = test_seq
log_dir: ../../../tmp\logs\20210504-190308
Wall time: 5 ms


## Analysis

Each experiment consisted of 10 images, 5 TP and 5 TN.

In [5]:
from os import listdir
from os.path import isfile, join

fpath = os.path.join(ROOT_DIR, "risei-history/risei-2d-res-net-parameters")

files = [f for f in listdir(fpath) if isfile(join(fpath, f))]
files[:5]

['hmap-parameters--deletion--b1+0-b2+0-b2+0.cls',
 'hmap-parameters--deletion--b1+0-b2+0-b2+1.cls',
 'hmap-parameters--deletion--b1+0-b2+0.25-b2+0.cls',
 'hmap-parameters--deletion--b1+0-b2+0.25-b2+1.cls',
 'hmap-parameters--deletion--b1+0-b2+0.5-b2+0.cls']

In [6]:
import re


def parse(fname):
    p = re.compile("^hmap-parameters--(\w+)--b1\+(\d+[.]?\d*)-b2\+(\d+[.]?\d*)-b2\+(\d+[.]?\d*)\.cls$")
    return p.match(fname).groups()

print(parse('hmap-parameters--deletion--b1+0-b2+0.5-b2+0.cls'))

('deletion', '0', '0.5', '0')


In [14]:
from src.heatmaps.evaluation import HeatmapEvaluationHistory

data = {}


def append(key, value):
    if not key in data:
        data[key] = []
    data[key].append(value)

    
for fname in files:
    metric, b1, b2, b2_value = parse(fname)
    append('metric', metric)
    append('b1', float(b1))
    append('b2', float(b2))
    append('b2_value', float(b2_value))
    
#     print(f"loading {fname}...")
    history = HeatmapEvaluationHistory.load(fpath, fname[:-4])
    desc = history._description()
    for key, value in desc.items():
        append(key, value)
    
    
df = pd.DataFrame(data=data)
df

Unnamed: 0,metric,b1,b2,b2_value,heatmaps,auc_mean,auc_p25,auc_median,auc_p75,auc_max,auc_min,auc_std
0,deletion,0.0,0.0,0.0,20,0.539094,0.105726,0.531534,0.98708,0.998951,0.009464,0.43275
1,deletion,0.0,0.0,1.0,20,0.539094,0.105726,0.531534,0.98708,0.998951,0.009464,0.43275
2,deletion,0.0,0.25,0.0,20,0.5377,0.09844,0.538055,0.987488,0.998967,0.01051,0.435707
3,deletion,0.0,0.25,1.0,20,0.460627,0.038588,0.349018,0.929746,0.973733,0.008996,0.425593
4,deletion,0.0,0.5,0.0,20,0.536553,0.083014,0.543233,0.987836,0.999009,0.018265,0.433824
5,deletion,0.0,0.5,1.0,20,0.54128,0.125306,0.524549,0.98709,0.998951,0.004162,0.430789
6,deletion,0.0,0.75,0.0,20,0.565121,0.113926,0.656168,0.992085,0.999543,0.040776,0.422006
7,deletion,0.0,0.75,1.0,20,0.613237,0.285617,0.649602,0.98709,0.998951,0.072185,0.368134
8,deletion,0.0,1.0,0.0,20,0.635256,0.267365,0.705851,0.995426,1.000102,0.184701,0.362711
9,deletion,0.0,1.0,1.0,20,0.613237,0.285617,0.649602,0.98709,0.998951,0.072185,0.368134


In [8]:
def table(metric, value):
    df_m = df[df['metric'] == metric]
    return pd.pivot_table(df_m, values=value, index=["b1"], columns=["b2", "b2_value"], fill_value=0)

In [9]:
table('deletion', 'auc_median')

b2,0.00,0.00,0.25,0.25,0.50,0.50,0.75,0.75,1.00,1.00
b2_value,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
b1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0.0,0.531534,0.531534,0.538055,0.349018,0.543233,0.524549,0.656168,0.649602,0.705851,0.649602
0.25,0.535414,0.535414,0.536723,0.344126,0.54196,0.52529,0.645467,0.649602,0.0,0.0


In [10]:
table('insertion', 'auc_median')

b2,0.00,0.00,0.25,0.25,0.50,0.50,0.75,0.75,1.00,1.00
b2_value,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
b1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0.0,0.590623,0.590623,0.548786,0.721142,0.487413,0.537548,0.469592,0.657508,0.464173,0.657508
0.25,0.583753,0.583753,0.548281,0.723357,0.48717,0.535909,0.473418,0.657508,0.469577,0.0


In [11]:
df_m = df.copy().sort_values("b1")
pd.pivot_table(df_m, values="auc_median", index=["b1"], columns=["metric", "b2", "b2_value"], fill_value=0)

metric,deletion,deletion,deletion,deletion,deletion,deletion,deletion,deletion,deletion,deletion,insertion,insertion,insertion,insertion,insertion,insertion,insertion,insertion,insertion,insertion
b2,0.00,0.00,0.25,0.25,0.50,0.50,0.75,0.75,1.00,1.00,0.00,0.00,0.25,0.25,0.50,0.50,0.75,0.75,1.00,1.00
b2_value,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
b1,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3
0.0,0.531534,0.531534,0.538055,0.349018,0.543233,0.524549,0.656168,0.649602,0.705851,0.649602,0.590623,0.590623,0.548786,0.721142,0.487413,0.537548,0.469592,0.657508,0.464173,0.657508
0.25,0.535414,0.535414,0.536723,0.344126,0.54196,0.52529,0.645467,0.649602,0.0,0.0,0.583753,0.583753,0.548281,0.723357,0.48717,0.535909,0.473418,0.657508,0.469577,0.0


In [12]:
df_m = df.copy().sort_values("b1")
pd.pivot_table(df_m, values="auc_median", index=["b1"], columns=["metric", "b2", "b2_value"], fill_value=0)

metric,deletion,deletion,deletion,deletion,deletion,deletion,deletion,deletion,deletion,deletion,insertion,insertion,insertion,insertion,insertion,insertion,insertion,insertion,insertion,insertion
b2,0.00,0.00,0.25,0.25,0.50,0.50,0.75,0.75,1.00,1.00,0.00,0.00,0.25,0.25,0.50,0.50,0.75,0.75,1.00,1.00
b2_value,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
b1,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3
0.0,0.531534,0.531534,0.538055,0.349018,0.543233,0.524549,0.656168,0.649602,0.705851,0.649602,0.590623,0.590623,0.548786,0.721142,0.487413,0.537548,0.469592,0.657508,0.464173,0.657508
0.25,0.535414,0.535414,0.536723,0.344126,0.54196,0.52529,0.645467,0.649602,0.0,0.0,0.583753,0.583753,0.548281,0.723357,0.48717,0.535909,0.473418,0.657508,0.469577,0.0


In [13]:
col = 'auc_median'
df_m = df.copy().sort_values('b1')
def apply_fn(row):
    if row['metric'] == 'deletion':
        return 1 - row[col]
    return row[col]
df_m[col] = df.apply(apply_fn, axis=1)

pd.pivot_table(df_m, values=col, index=['b1'], columns=['b2', 'b2_value'], fill_value=np.nan)

b2,0.00,0.00,0.25,0.25,0.50,0.50,0.75,0.75,1.00,1.00
b2_value,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
b1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0.0,0.529544,0.529544,0.505365,0.686062,0.47209,0.506499,0.406712,0.503953,0.379161,0.503953
0.25,0.524169,0.524169,0.505779,0.689615,0.472605,0.50531,0.413976,0.503953,0.469577,
