# Heatmap Parameters Analysis

In [20]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../../../..')

import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import datetime

import numpy as np
import pandas as pd

from src.data import train_test_split, MRISequence
from src.model import create_model, compile_model, load_checkpoint
from src.model.evaluation import show_metrics

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="white")

plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['image.cmap'] = 'viridis'

%config InlineBackend.figure_format='retina'
plt.rcParams.update({'font.size': 15})

In [3]:
import tensorflow as tf

# RANDOM_SEED = 250398
# tf.random.set_seed(RANDOM_SEED)

print(tf.version.VERSION)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

2.3.2
Num GPUs Available:  1


## Setup

In [4]:
%%time

ROOT_DIR = '../../../../tmp'
DEFAULT_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'checkpoints')
DEFAULT_BCKP_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'bckp-checkpoints')

LOG_DIRECTORY = os.path.join(ROOT_DIR, 'logs')
CHECKPOINT_DIRECTORY = DEFAULT_CHECKPOINT_DIRECTORY_LOCAL

LOG_DIRECTORY_LOCAL = LOG_DIRECTORY
CHECKPOINT_DIRECTORY_LOCAL = CHECKPOINT_DIRECTORY

DATA_DIR_NAME = 'data-v3'
DATA_DIR = os.path.join(ROOT_DIR, DATA_DIR_NAME)

saliencies_and_segmentations_v2_path = os.path.join(ROOT_DIR, 'saliencies_and_segmentations_v2')

if not os.path.exists(CHECKPOINT_DIRECTORY):
    os.mkdir(CHECKPOINT_DIRECTORY)

if not os.path.exists(LOG_DIRECTORY):
    os.mkdir(LOG_DIRECTORY)

val = False

class_names = ['AD', 'CN']

# get paths to data
train_dir, test_dir, val_dir = train_test_split(
    saliencies_and_segmentations_v2_path,
    ROOT_DIR,
    split=(0.8, 0.15, 0.05),
    dirname=DATA_DIR_NAME)

# set the batch size for mri seq
batch_size = 12
input_shape = (104, 128, 104, 1) # (112, 112, 105, 1)
resize_img = True
crop_img = True

# if y is one-hot encoded or just scalar number
one_hot = True

# class weightss (see analysis notebook)
class_weights = {0: 0.8072289156626505, 1: 1.3137254901960784}

# description statistics of the dataset
desc = {'mean': -3.6344006e-09, 'std': 1.0000092, 'min': -1.4982183, 'max': 10.744175}

if 'desc' not in locals():
    print('initializing desc...')
    desc = get_description(MRISequence(
        train_dir,
        64,
        class_names=class_names,
        input_shape=input_shape),
        max_samples=None)
    print(desc)


normalization={ 'type':'normalization', 'desc': desc }
# normalization={'type':'standardization', 'desc':desc }

augmentations = None
augmentations_inplace = True
# enable augmentations in mri seq (otherwise it can be enabled in dataset)
# augmentations={ 'random_swap_hemispheres': 0.5 }

# initialize sequences
print('initializing train_seq...')
train_seq = MRISequence(
    train_dir,
    batch_size,
    class_names=class_names,
    augmentations=augmentations,
    augmentations_inplace=augmentations_inplace,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    class_weights=class_weights,
    normalization=normalization)

print('initializing test_seq...')
test_seq = MRISequence(
    test_dir,
    batch_size,
    class_names=class_names,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    normalization=normalization)

if val:
    print('initializing val_seq...')
    val_seq = MRISequence(
        val_dir,
        batch_size,
        class_names=class_names,
        input_shape=input_shape,
        resize_img=resize_img,
        crop_img=crop_img,
        one_hot=one_hot,
        class_weights=class_weights,
        normalization=normalization)
else:
    print('val_seq = test_seq')
    val_seq = test_seq

model_key = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
log_dir = os.path.join(LOG_DIRECTORY, model_key)
print(f'log_dir: {log_dir}')

not copying files since the destination directory already exists
initializing train_seq...
initializing test_seq...
val_seq = test_seq
log_dir: ../../../../tmp\logs\20210412-212049
Wall time: 0 ns


## Analysis

Each experiment consistet of 10 images, 5 TP and 5 TN.

In [19]:
from os import listdir
from os.path import isfile, join

fpath = os.path.join(ROOT_DIR, "risei-history/hmap-parameters")

files = [f for f in listdir(fpath) if isfile(join(fpath, f))]
files[:5]

['hmap-parameters--deletion--m+1024-p1+0.25.cls',
 'hmap-parameters--deletion--m+1024-p1+0.3333333333333333.cls',
 'hmap-parameters--deletion--m+1024-p1+0.5.cls',
 'hmap-parameters--deletion--m+1024-p1+0.6666666666666666.cls',
 'hmap-parameters--deletion--m+1024-p1+0.75.cls']

In [24]:
import re


def parse(fname):
    p = re.compile("^hmap-parameters--(\w+)--m\+(\d+)-p1\+(\d+[.]?\d*)\.cls$")
    return p.match(fname).groups()

print(parse('hmap-parameters--deletion--m+1024-p1+0.6666666666666666.cls'))

('deletion', '1024', '0.6666666666666666')


In [57]:
from src.heatmaps.evaluation import HeatmapEvaluationHistory

data = {}


def append(key, value):
    if not key in data:
        data[key] = []
    data[key].append(value)

    
for fname in files:
    metric, masks_count, p1 = parse(fname)
    append('metric', metric)
    append('masks_count', int(masks_count))
    append('p1', float(p1))
    
    history = HeatmapEvaluationHistory.load(fpath, fname[:-4])
    desc = history._description()
    for key, value in desc.items():
        append(key, value)
    
    
df = pd.DataFrame(data=data)
df.head()

Unnamed: 0,metric,masks_count,p1,heatmaps,auc_mean,auc_p25,auc_median,auc_p75,auc_max,auc_min,auc_std
0,deletion,1024,0.25,10,0.649712,0.635132,0.686637,0.708797,0.765139,0.410772,0.098614
1,deletion,1024,0.333333,10,0.643368,0.598627,0.643846,0.711018,0.746464,0.499782,0.077135
2,deletion,1024,0.5,10,0.674387,0.639377,0.652944,0.712121,0.791653,0.60572,0.054341
3,deletion,1024,0.666667,10,0.696549,0.684787,0.709787,0.719024,0.796801,0.581419,0.055599
4,deletion,1024,0.75,10,0.673616,0.62503,0.673777,0.716514,0.772723,0.589796,0.059095


In [59]:
def table(metric, value):
    df_m = df[df['metric'] == metric]
    df_m = df_m.sort_values('masks_count')
    return pd.pivot_table(df_m, values=value, index=["masks_count"], columns="p1", fill_value=0)

In [60]:
table('deletion', 'auc_median')

p1,0.250000,0.333333,0.500000,0.666667,0.750000
masks_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
128,0.614635,0.599144,0.650838,0.620568,0.601672
256,0.665848,0.67978,0.668759,0.676215,0.587926
512,0.675962,0.660764,0.668481,0.713105,0.682327
1024,0.686637,0.643846,0.652944,0.709787,0.673777
2048,0.677393,0.645488,0.662798,0.718824,0.681363
4096,0.680063,0.655541,0.661081,0.710274,0.681162


In [61]:
table('insertion', 'auc_median')

p1,0.250000,0.333333,0.500000,0.666667,0.750000
masks_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
128,0.569967,0.587399,0.591171,0.544097,0.520112
256,0.544671,0.505341,0.49444,0.508974,0.495576
512,0.542485,0.486957,0.472576,0.478954,0.459759
1024,0.518594,0.491493,0.473633,0.452452,0.444355
2048,0.524665,0.503283,0.477993,0.475226,0.450821
4096,0.525253,0.498184,0.465332,0.466693,0.454734


In [64]:
df_m = df.copy().sort_values("masks_count")
pd.pivot_table(df_m, values="auc_median", index=["masks_count"], columns=["p1", "metric"], fill_value=0)

p1,0.250000,0.250000,0.333333,0.333333,0.500000,0.500000,0.666667,0.666667,0.750000,0.750000
metric,deletion,insertion,deletion,insertion,deletion,insertion,deletion,insertion,deletion,insertion
masks_count,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
128,0.614635,0.569967,0.599144,0.587399,0.650838,0.591171,0.620568,0.544097,0.601672,0.520112
256,0.665848,0.544671,0.67978,0.505341,0.668759,0.49444,0.676215,0.508974,0.587926,0.495576
512,0.675962,0.542485,0.660764,0.486957,0.668481,0.472576,0.713105,0.478954,0.682327,0.459759
1024,0.686637,0.518594,0.643846,0.491493,0.652944,0.473633,0.709787,0.452452,0.673777,0.444355
2048,0.677393,0.524665,0.645488,0.503283,0.662798,0.477993,0.718824,0.475226,0.681363,0.450821
4096,0.680063,0.525253,0.655541,0.498184,0.661081,0.465332,0.710274,0.466693,0.681162,0.454734


In [65]:
df_m = df.copy().sort_values("masks_count")
pd.pivot_table(df_m, values="auc_median", index=["masks_count"], columns="p1", fill_value=0) # value is mean of insertion and deletion auc_median

p1,0.250000,0.333333,0.500000,0.666667,0.750000
masks_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
128,0.592301,0.593272,0.621004,0.582333,0.560892
256,0.605259,0.59256,0.5816,0.592594,0.541751
512,0.609224,0.573861,0.570529,0.596029,0.571043
1024,0.602616,0.56767,0.563288,0.58112,0.559066
2048,0.601029,0.574385,0.570396,0.597025,0.566092
4096,0.602658,0.576863,0.563207,0.588483,0.567948
