# Heatmap Parameters Analysis

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../../../..')

import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import datetime

import numpy as np
import pandas as pd

from src.data import train_test_split, MRISequence
from src.model import create_model, compile_model, load_checkpoint
from src.model.evaluation import show_metrics

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="white")

plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['image.cmap'] = 'viridis'

%config InlineBackend.figure_format='retina'
plt.rcParams.update({'font.size': 15})

In [3]:
import tensorflow as tf

# RANDOM_SEED = 250398
# tf.random.set_seed(RANDOM_SEED)

print(tf.version.VERSION)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

2.3.2
Num GPUs Available:  1


## Setup

In [4]:
%%time

ROOT_DIR = '../../../../tmp'
DEFAULT_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'checkpoints')
DEFAULT_BCKP_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'bckp-checkpoints')

LOG_DIRECTORY = os.path.join(ROOT_DIR, 'logs')
CHECKPOINT_DIRECTORY = DEFAULT_CHECKPOINT_DIRECTORY_LOCAL

LOG_DIRECTORY_LOCAL = LOG_DIRECTORY
CHECKPOINT_DIRECTORY_LOCAL = CHECKPOINT_DIRECTORY

DATA_DIR_NAME = 'data-v3'
DATA_DIR = os.path.join(ROOT_DIR, DATA_DIR_NAME)

saliencies_and_segmentations_v2_path = os.path.join(ROOT_DIR, 'saliencies_and_segmentations_v2')

if not os.path.exists(CHECKPOINT_DIRECTORY):
    os.mkdir(CHECKPOINT_DIRECTORY)

if not os.path.exists(LOG_DIRECTORY):
    os.mkdir(LOG_DIRECTORY)

val = False

class_names = ['AD', 'CN']

# get paths to data
train_dir, test_dir, val_dir = train_test_split(
    saliencies_and_segmentations_v2_path,
    ROOT_DIR,
    split=(0.8, 0.15, 0.05),
    dirname=DATA_DIR_NAME)

# set the batch size for mri seq
batch_size = 12
input_shape = (104, 128, 104, 1) # (112, 112, 105, 1)
resize_img = True
crop_img = True

# if y is one-hot encoded or just scalar number
one_hot = True

# class weightss (see analysis notebook)
class_weights = {0: 0.8072289156626505, 1: 1.3137254901960784}

# description statistics of the dataset
desc = {'mean': -3.6344006e-09, 'std': 1.0000092, 'min': -1.4982183, 'max': 10.744175}

if 'desc' not in locals():
    print('initializing desc...')
    desc = get_description(MRISequence(
        train_dir,
        64,
        class_names=class_names,
        input_shape=input_shape),
        max_samples=None)
    print(desc)


normalization={ 'type':'normalization', 'desc': desc }
# normalization={'type':'standardization', 'desc':desc }

augmentations = None
augmentations_inplace = True
# enable augmentations in mri seq (otherwise it can be enabled in dataset)
# augmentations={ 'random_swap_hemispheres': 0.5 }

# initialize sequences
print('initializing train_seq...')
train_seq = MRISequence(
    train_dir,
    batch_size,
    class_names=class_names,
    augmentations=augmentations,
    augmentations_inplace=augmentations_inplace,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    class_weights=class_weights,
    normalization=normalization)

print('initializing test_seq...')
test_seq = MRISequence(
    test_dir,
    batch_size,
    class_names=class_names,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    normalization=normalization)

if val:
    print('initializing val_seq...')
    val_seq = MRISequence(
        val_dir,
        batch_size,
        class_names=class_names,
        input_shape=input_shape,
        resize_img=resize_img,
        crop_img=crop_img,
        one_hot=one_hot,
        class_weights=class_weights,
        normalization=normalization)
else:
    print('val_seq = test_seq')
    val_seq = test_seq

model_key = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
log_dir = os.path.join(LOG_DIRECTORY, model_key)
print(f'log_dir: {log_dir}')

not copying files since the destination directory already exists
initializing train_seq...
initializing test_seq...
val_seq = test_seq
log_dir: ../../../../tmp\logs\20210504-182413
Wall time: 15.6 ms


## Analysis

Each experiment consisted of 10 images, 5 TP and 5 TN.

In [5]:
from os import listdir
from os.path import isfile, join

fpath = os.path.join(ROOT_DIR, "risei-history/heatmap-parameters--b1-0.5-b2-0.5")

files = [f for f in listdir(fpath) if isfile(join(fpath, f))]
files[:5]

['hmap-parameters--deletion--m+128-p1+0.25.cls',
 'hmap-parameters--deletion--m+128-p1+0.3333333333333333.cls',
 'hmap-parameters--deletion--m+128-p1+0.5.cls',
 'hmap-parameters--deletion--m+128-p1+0.6666666666666666.cls',
 'hmap-parameters--deletion--m+128-p1+0.75.cls']

In [6]:
import re


def parse(fname):
    p = re.compile("^hmap-parameters--(\w+)--m\+(\d+)-p1\+(\d+[.]?\d*)\.cls$")
    return p.match(fname).groups()

print(parse('hmap-parameters--deletion--m+1024-p1+0.6666666666666666.cls'))

('deletion', '1024', '0.6666666666666666')


In [7]:
from src.heatmaps.evaluation import HeatmapEvaluationHistory

data = {}


def append(key, value):
    if not key in data:
        data[key] = []
    data[key].append(value)

    
for fname in files:
    metric, masks_count, p1 = parse(fname)
    append('metric', metric)
    append('masks_count', int(masks_count))
    append('p1', float(p1))
    
    history = HeatmapEvaluationHistory.load(fpath, fname[:-4])
    desc = history._description()
    for key, value in desc.items():
        append(key, value)
    
    
df = pd.DataFrame(data=data)
df.head()

Unnamed: 0,metric,masks_count,p1,heatmaps,auc_mean,auc_p25,auc_median,auc_p75,auc_max,auc_min,auc_std
0,deletion,128,0.25,10,0.589281,0.531212,0.602591,0.641664,0.702958,0.454636,0.078994
1,deletion,128,0.333333,10,0.580893,0.528341,0.567233,0.635748,0.712158,0.468233,0.077962
2,deletion,128,0.5,10,0.620902,0.573422,0.622021,0.6523,0.756861,0.518089,0.063379
3,deletion,128,0.666667,10,0.646644,0.575842,0.653779,0.720063,0.765575,0.517155,0.083998
4,deletion,128,0.75,10,0.606913,0.539775,0.565539,0.689257,0.77029,0.427457,0.106312


In [8]:
def table(metric, value):
    df_m = df[df['metric'] == metric]
    df_m = df_m.sort_values('masks_count')
    return pd.pivot_table(df_m, values=value, index=["masks_count"], columns="p1", fill_value=0)

In [9]:
table('deletion', 'auc_median')

p1,0.250000,0.333333,0.500000,0.666667,0.750000
masks_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8,0.591361,0.556812,0.533779,0.581542,0.54286
16,0.58651,0.550912,0.517144,0.543406,0.528202
32,0.617767,0.589214,0.634696,0.622196,0.622501
64,0.605517,0.62716,0.648853,0.63737,0.606355
128,0.602591,0.567233,0.622021,0.653779,0.565539
256,0.62774,0.6407,0.652001,0.661095,0.654273
512,0.660608,0.654775,0.688681,0.682118,0.679965


In [10]:
table('insertion', 'auc_median')

p1,0.250000,0.333333,0.500000,0.666667,0.750000
masks_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8,0.57616,0.578847,0.569199,0.601318,0.547716
16,0.587338,0.621799,0.636231,0.625056,0.626285
32,0.59819,0.560023,0.550707,0.552671,0.570513
64,0.548839,0.535751,0.546536,0.515156,0.531588
128,0.582483,0.578112,0.572602,0.530849,0.501366
256,0.534903,0.526852,0.492739,0.524805,0.533466
512,0.531693,0.51649,0.462981,0.465981,0.492084


In [11]:
df_m = df.copy().sort_values("masks_count")
pd.pivot_table(df_m, values="auc_median", index=["masks_count"], columns=["p1", "metric"], fill_value=0)

p1,0.250000,0.250000,0.333333,0.333333,0.500000,0.500000,0.666667,0.666667,0.750000,0.750000
metric,deletion,insertion,deletion,insertion,deletion,insertion,deletion,insertion,deletion,insertion
masks_count,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
8,0.591361,0.57616,0.556812,0.578847,0.533779,0.569199,0.581542,0.601318,0.54286,0.547716
16,0.58651,0.587338,0.550912,0.621799,0.517144,0.636231,0.543406,0.625056,0.528202,0.626285
32,0.617767,0.59819,0.589214,0.560023,0.634696,0.550707,0.622196,0.552671,0.622501,0.570513
64,0.605517,0.548839,0.62716,0.535751,0.648853,0.546536,0.63737,0.515156,0.606355,0.531588
128,0.602591,0.582483,0.567233,0.578112,0.622021,0.572602,0.653779,0.530849,0.565539,0.501366
256,0.62774,0.534903,0.6407,0.526852,0.652001,0.492739,0.661095,0.524805,0.654273,0.533466
512,0.660608,0.531693,0.654775,0.51649,0.688681,0.462981,0.682118,0.465981,0.679965,0.492084


In [12]:
df_m = df.copy().sort_values("masks_count")
pd.pivot_table(df_m, values="auc_median", index=["masks_count"], columns="p1", fill_value=0) # value is mean of insertion and deletion auc_median

p1,0.250000,0.333333,0.500000,0.666667,0.750000
masks_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8,0.58376,0.567829,0.551489,0.59143,0.545288
16,0.586924,0.586356,0.576688,0.584231,0.577243
32,0.607978,0.574619,0.592702,0.587433,0.596507
64,0.577178,0.581456,0.597695,0.576263,0.568972
128,0.592537,0.572672,0.597311,0.592314,0.533452
256,0.581322,0.583776,0.57237,0.59295,0.59387
512,0.59615,0.585633,0.575831,0.574049,0.586025
