# Tf-Explain

In [138]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../../../..')

import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import datetime

import numpy as np

from src.data import train_test_split, MRISequence
from src.model import create_model, compile_model, load_checkpoint
from src.model.evaluation import show_metrics

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="white")

plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['image.cmap'] = 'viridis'

%config InlineBackend.figure_format='retina'
plt.rcParams.update({'font.size': 15})

In [3]:
# import tensorflow as tf

# RANDOM_SEED = 250398
# tf.random.set_seed(RANDOM_SEED)

# print(tf.version.VERSION)
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [4]:
## Setup

In [5]:
%%time

ROOT_DIR = '../../../../tmp'
DEFAULT_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'checkpoints')
DEFAULT_BCKP_CHECKPOINT_DIRECTORY_LOCAL = os.path.join(ROOT_DIR, 'bckp-checkpoints')

LOG_DIRECTORY = os.path.join(ROOT_DIR, 'logs')
CHECKPOINT_DIRECTORY = DEFAULT_CHECKPOINT_DIRECTORY_LOCAL

LOG_DIRECTORY_LOCAL = LOG_DIRECTORY
CHECKPOINT_DIRECTORY_LOCAL = CHECKPOINT_DIRECTORY

DATA_DIR_NAME = 'data-v3'
DATA_DIR = os.path.join(ROOT_DIR, DATA_DIR_NAME)

saliencies_and_segmentations_v2_path = os.path.join(ROOT_DIR, 'saliencies_and_segmentations_v2')

if not os.path.exists(CHECKPOINT_DIRECTORY):
    os.mkdir(CHECKPOINT_DIRECTORY)

if not os.path.exists(LOG_DIRECTORY):
    os.mkdir(LOG_DIRECTORY)

val = False

class_names = ['AD', 'CN']

# get paths to data
train_dir, test_dir, val_dir = train_test_split(
    saliencies_and_segmentations_v2_path,
    ROOT_DIR,
    split=(0.8, 0.15, 0.05),
    dirname=DATA_DIR_NAME)

# set the batch size for mri seq
batch_size = 12
input_shape = (104, 128, 104, 1) # (112, 112, 105, 1)
resize_img = True
crop_img = True

# if y is one-hot encoded or just scalar number
one_hot = True

# class weightss (see analysis notebook)
class_weights = {0: 0.8072289156626505, 1: 1.3137254901960784}

# description statistics of the dataset
desc = {'mean': -3.6344006e-09, 'std': 1.0000092, 'min': -1.4982183, 'max': 10.744175}

if 'desc' not in locals():
    print('initializing desc...')
    desc = get_description(MRISequence(
        train_dir,
        64,
        class_names=class_names,
        input_shape=input_shape),
        max_samples=None)
    print(desc)


normalization={ 'type':'normalization', 'desc': desc }
# normalization={'type':'standardization', 'desc':desc }

augmentations = None
augmentations_inplace = True
# enable augmentations in mri seq (otherwise it can be enabled in dataset)
# augmentations={ 'random_swap_hemispheres': 0.5 }

# initialize sequences
print('initializing train_seq...')
train_seq = MRISequence(
    train_dir,
    batch_size,
    class_names=class_names,
    augmentations=augmentations,
    augmentations_inplace=augmentations_inplace,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    class_weights=class_weights,
    normalization=normalization)

print('initializing test_seq...')
test_seq = MRISequence(
    test_dir,
    batch_size,
    class_names=class_names,
    input_shape=input_shape,
    resize_img=resize_img,
    crop_img=crop_img,
    one_hot=one_hot,
    normalization=normalization)

if val:
    print('initializing val_seq...')
    val_seq = MRISequence(
        val_dir,
        batch_size,
        class_names=class_names,
        input_shape=input_shape,
        resize_img=resize_img,
        crop_img=crop_img,
        one_hot=one_hot,
        class_weights=class_weights,
        normalization=normalization)
else:
    print('val_seq = test_seq')
    val_seq = test_seq

model_key = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
log_dir = os.path.join(LOG_DIRECTORY, model_key)
print(f'log_dir: {log_dir}')

not copying files since the destination directory already exists
initializing train_seq...
initializing test_seq...
val_seq = test_seq
log_dir: ../../../../tmp\logs\20210403-092508
Wall time: 6.11 ms


In [6]:
# https://www.tensorflow.org/tutorials/structured_data/imbalanced_data#class_weights
# pos / neg
initial_bias = np.log([159/243, 243/159])

model_type = '3d_cnn'
model_config = {
    'input_shape': input_shape,
    'class_names': class_names,
#     'l2_beta': 0.001,
#     'l2_beta': 0.0005,
    'l2_beta': None,
#     'dropout': 0.05,
    'dropout': 0.10,
    'output_bias': initial_bias,
#     'output_bias': None,
    # https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization
    'batch_norm': True,
    'is_complex': False, # a complex layer from the paper, max batch_size is 3
}

compile_config = {
    # default is 0.001
#     'learning_rate': 0.000075,
    'learning_rate': 0.00010,
    'decay_steps': 25,
    'decay_rate': 0.96,
#     'beta_1': 0.85,
    'beta_1': 0.90,
#     'beta_2': 0.990,
    'beta_2': 0.999,
}

train_config = {
    'model_key': model_key,
    'epochs': 150,
    'patience': 75,
    'tensorboard_update_freq': 'epoch',
    'mri_tensorboard_callback': False,
    'model_checkpoint_callback': {'monitor': 'val_auc', 'mode': 'max', 'save_best_only': True},
    'early_stopping_monitor': {'monitor': 'val_auc', 'mode': 'max'},
#     'augmentations': False,
    'augmentations': {
        'invert': (0.5, None),
        'rotate': (0.2, 5), # probability, degrees
        'zoom': (0., 0.),
        'shear': (0.2, 0.5), # probability, degrees
        'blur': (0.2, 0.85),
        'noise': (0.2, 0.00020)
    },
    'batch_size': 8,
#     'model_checkpoint_callback': False,
}

## Model

In [7]:
# model = create_model(model_type, model_config)
# model, *_ = compile_model(model, **compile_config)
# model.build(input_shape=input_shape)
# model.summary()

In [8]:
# load_checkpoint(model, DEFAULT_BCKP_CHECKPOINT_DIRECTORY_LOCAL, '20210308-175324', 'cp-0058.ckpt')

In [31]:
%%time

# just to test of it is ok to clear the session after loading the weigths
# tf.keras.backend.clear_session()

# show_metrics(model, test_seq, class_names)

Wall time: 0 ns


In [32]:
# data = []

# for var in model.trainable_variables:
#     data.append(var.value().numpy().transpose())
    
# data = np.array(data)
# np.save('./weights.npy', data)

In [83]:
weights = np.load('./weights.npy', allow_pickle=True)

In [132]:
import torch.nn as nn
import torch.nn.functional as F
import torch

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv3d(1, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm3d(32)
        self.mpool1 = nn.MaxPool3d(2, stride=2, padding=0)
        
        self.conv2 = nn.Conv3d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm3d(64)
        self.mpool2 = nn.MaxPool3d(3, stride=3)
        
        self.conv3 = nn.Conv3d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm3d(128)
        self.mpool3 = nn.MaxPool3d(4, stride=4)
        
        self.flt = nn.Flatten()
        self.dp1 = nn.Dropout(p=0.1)
        
        self.fc1 = nn.Linear(10240, 256)
        self.dp2 = nn.Dropout(p=0.1)
        
        self.fc2 = nn.Linear(256, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.mpool1(x)
        torch.cuda.empty_cache()
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.mpool2(x)
        torch.cuda.empty_cache()
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.mpool3(x)      
        
        x = self.flt(x)
        x = self.dp1(x)
        
        x = F.relu(self.fc1(x))
        torch.cuda.empty_cache()
        x = self.dp2(x)
        
        x = F.softmax(self.fc2(x), dim=1)
        
        return x


net = Net()

In [141]:
# v data mam ulozene tensorflow vahy po layeroch, takto ich nacitavam do pytorch
for value, param in zip(weights, net.parameters()):
    print(f"---")
    print(value.shape) # .astype(np.float64)
    tensor = torch.from_numpy(value)
    print(param.shape)
    param.data = tensor
#     print(value[0][0])
#     print(param.data.numpy()[0][0])
    
net.eval().to('cuda')

---
(32, 1, 3, 3, 3)
torch.Size([32, 1, 3, 3, 3])
---
(32,)
torch.Size([32])
---
(32,)
torch.Size([32])
---
(32,)
torch.Size([32])
---
(64, 32, 3, 3, 3)
torch.Size([64, 32, 3, 3, 3])
---
(64,)
torch.Size([64])
---
(64,)
torch.Size([64])
---
(64,)
torch.Size([64])
---
(128, 64, 3, 3, 3)
torch.Size([128, 64, 3, 3, 3])
---
(128,)
torch.Size([128])
---
(128,)
torch.Size([128])
---
(128,)
torch.Size([128])
---
(256, 10240)
torch.Size([256, 10240])
---
(256,)
torch.Size([256])
---
(2, 256)
torch.Size([2, 256])
---
(2,)
torch.Size([2])


Net(
  (conv1): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn1): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mpool1): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mpool2): MaxPool3d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (bn3): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mpool3): MaxPool3d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (flt): Flatten(start_dim=1, end_dim=-1)
  (dp1): Dropout(p=0.1, inplace=False)
  (fc1): Linear(in_features=10240, out_features=256, bias=True)
  (dp2): Dropout(p=0.1, inplace=False)
  (fc2): Linear(in_featu

In [142]:
s = 0
for p in net.parameters():
    s += p.data.cpu().numpy().sum()
print(s)

s = 0
for p in weights:
    s += p.sum()
print(s)

-984.8062404675438
-984.8062404675438


In [143]:
from torchsummary import summary

summary(net, (1, 104, 128, 104))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1    [-1, 32, 104, 128, 104]             896
       BatchNorm3d-2    [-1, 32, 104, 128, 104]              64
         MaxPool3d-3       [-1, 32, 52, 64, 52]               0
            Conv3d-4       [-1, 64, 52, 64, 52]          55,360
       BatchNorm3d-5       [-1, 64, 52, 64, 52]             128
         MaxPool3d-6       [-1, 64, 17, 21, 17]               0
            Conv3d-7      [-1, 128, 17, 21, 17]         221,312
       BatchNorm3d-8      [-1, 128, 17, 21, 17]             256
         MaxPool3d-9         [-1, 128, 4, 5, 4]               0
          Flatten-10                [-1, 10240]               0
          Dropout-11                [-1, 10240]               0
           Linear-12                  [-1, 256]       2,621,696
          Dropout-13                  [-1, 256]               0
           Linear-14                   

In [144]:
batch_x, batch_y = test_seq[2]

test_x = torch.from_numpy(np.transpose(batch_x, axes=(0, 4, 3, 2, 1))).float()
test_y = torch.from_numpy(batch_y).float()

test_dataset = torch.utils.data.TensorDataset(test_x, test_y)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False)

In [145]:
with torch.no_grad():
    for i, (inputs, targets) in enumerate(test_loader):
        y = net(inputs.to('cuda'))
        y_pred = y.to('cpu').detach().numpy()
        print(y_pred)

[[0.29865196 0.70134807]
 [0.29880002 0.70119995]
 [0.29912612 0.70087385]
 [0.29975107 0.7002489 ]
 [0.29955003 0.70044994]
 [0.29985633 0.70014364]
 [0.30012932 0.6998707 ]
 [0.29898825 0.7010117 ]]
[[0.29933766 0.7006624 ]
 [0.29959735 0.7004027 ]
 [0.29940754 0.70059246]
 [0.29936785 0.7006321 ]]


## Data

In [None]:
from src.data import select_from_dataset

images_x, images_y = select_from_dataset(model, test_seq, max_category=10)

print(images_x.shape)

## Tf-Explain

In [None]:
from tf_explain.core import IntegratedGradients

In [None]:
explainer = IntegratedGradients()

data = (np.array([images_x[0]]), np.array([images_y[0]]))
class_index = np.argmax(images_y[0], axis=0)
grid = explainer.explain(data, model, class_index=class_index)
print(grid.shape)

In [None]:
import matplotlib.pyplot as plt

plt.imshow(grid[56])

In [None]:
from tf_explain.core import GradientsInputs

In [None]:
explainer = GradientsInputs()

data = (np.array([images_x[0]]), np.array([images_y[0]]))
class_index = np.argmax(images_y[0], axis=0)
grid = explainer.explain(data, model, class_index=class_index)
print(grid.shape)

In [None]:
import matplotlib.pyplot as plt

plt.imshow(grid[56])

In [None]:
from tf_explain.core import OcclusionSensitivity

In [None]:
explainer = OcclusionSensitivity(batch_size=8)

data = (np.array([images_x[0]]), np.array([images_y[0]]))
class_index = np.argmax(images_y[0], axis=0)
grid = explainer.explain(data, model, class_index=class_index, patch_size=10)
print(grid.shape)

In [None]:
import matplotlib.pyplot as plt

plt.imshow(grid)

# Not working :(

In [None]:
"""
Core Module for Grad CAM Algorithm
"""
import numpy as np
import tensorflow as tf
import cv2

from tf_explain.utils.display import grid_display, heatmap_display
from tf_explain.utils.saver import save_rgbZ


class GradCAM:

    """
    Perform Grad CAM algorithm for a given input
    Paper: [Grad-CAM: Visual Explanations from Deep Networks
            via Gradient-based Localization](https://arxiv.org/abs/1610.02391)
    """

    def explain(
        self,
        validation_data,
        model,
        class_index,
        layer_name=None,
        use_guided_grads=True,
        colormap=cv2.COLORMAP_VIRIDIS,
        image_weight=0.7,
    ):
        """
        Compute GradCAM for a specific class index.
        Args:
            validation_data (Tuple[np.ndarray, Optional[np.ndarray]]): Validation data
                to perform the method on. Tuple containing (x, y).
            model (tf.keras.Model): tf.keras model to inspect
            class_index (int): Index of targeted class
            layer_name (str): Targeted layer for GradCAM. If no layer is provided, it is
                automatically infered from the model architecture.
            colormap (int): OpenCV Colormap to use for heatmap visualization
            image_weight (float): An optional `float` value in range [0,1] indicating the weight of
                the input image to be overlaying the calculated attribution maps. Defaults to `0.7`.
            use_guided_grads (boolean): Whether to use guided grads or raw gradients
        Returns:
            numpy.ndarray: Grid of all the GradCAM
        """
        images, _ = validation_data

        if layer_name is None:
            layer_name = self.infer_grad_cam_target_layer(model)

        outputs, grads = GradCAM.get_gradients_and_filters(
            model, images, layer_name, class_index, use_guided_grads
        )

        cams = GradCAM.generate_ponderated_output(outputs, grads)

        return cams

    @staticmethod
    def infer_grad_cam_target_layer(model):
        """
        Search for the last convolutional layer to perform Grad CAM, as stated
        in the original paper.
        Args:
            model (tf.keras.Model): tf.keras model to inspect
        Returns:
            str: Name of the target layer
        """
        for layer in reversed(model.layers):
            # Select closest 4D layer to the end of the network.
            if len(layer.output_shape) == 5:
                return layer.name

        raise ValueError(
            "Model does not seem to contain 5D layer. Grad CAM cannot be applied."
        )

    @staticmethod
    def get_gradients_and_filters(
        model, images, layer_name, class_index, use_guided_grads
    ):
        """
        Generate guided gradients and convolutional outputs with an inference.
        Args:
            model (tf.keras.Model): tf.keras model to inspect
            images (numpy.ndarray): 4D-Tensor with shape (batch_size, H, W, 3)
            layer_name (str): Targeted layer for GradCAM
            class_index (int): Index of targeted class
            use_guided_grads (boolean): Whether to use guided grads or raw gradients
        Returns:
            Tuple[tf.Tensor, tf.Tensor]: (Target layer outputs, Guided gradients)
        """
        grad_model = tf.keras.models.Model(
            [model.inputs], [model.get_layer(layer_name).output, model.output]
        )

        with tf.GradientTape() as tape:
            inputs = tf.cast(images, tf.float32)
            conv_outputs, predictions = grad_model(inputs)
            loss = predictions[:, class_index]

        grads = tape.gradient(loss, conv_outputs)

        if use_guided_grads:
            grads = (
                tf.cast(conv_outputs > 0, "float32")
                * tf.cast(grads > 0, "float32")
                * grads
            )

        return conv_outputs, grads

    @staticmethod
    def generate_ponderated_output(outputs, grads):
        """
        Apply Grad CAM algorithm scheme.
        Inputs are the convolutional outputs (shape WxHxN) and gradients (shape WxHxN).
        From there:
            - we compute the spatial average of the gradients
            - we build a ponderated sum of the convolutional outputs based on those averaged weights
        Args:
            output (tf.Tensor): Target layer outputs, with shape (batch_size, Hl, Wl, Nf),
                where Hl and Wl are the target layer output height and width, and Nf the
                number of filters.
            grads (tf.Tensor): Guided gradients with shape (batch_size, Hl, Wl, Nf)
        Returns:
            List[tf.Tensor]: List of ponderated output of shape (batch_size, Hl, Wl, 1)
        """

        maps = [
            GradCAM.ponderate_output(output, grad)
            for output, grad in zip(outputs, grads)
        ]

        return maps

    @staticmethod
    def ponderate_output(output, grad):
        """
        Perform the ponderation of filters output with respect to average of gradients values.
        Args:
            output (tf.Tensor): Target layer outputs, with shape (Hl, Wl, Nf),
                where Hl and Wl are the target layer output height and width, and Nf the
                number of filters.
            grads (tf.Tensor): Guided gradients with shape (Hl, Wl, Nf)
        Returns:
            tf.Tensor: Ponderated output of shape (Hl, Wl, 1)
        """
        weights = tf.reduce_mean(grad, axis=(0, 1, 2))

        # Perform ponderated sum : w_i * output[:, :, i]
        cam = tf.reduce_sum(tf.multiply(weights, output), axis=-1)

        return cam

    def save(self, grid, output_dir, output_name):
        """
        Save the output to a specific dir.
        Args:
            grid (numpy.ndarray): Grid of all the heatmaps
            output_dir (str): Output directory path
            output_name (str): Output name
        """
        save_rgb(grid, output_dir, output_name)

In [None]:
from skimage.transform import resize

explainer = GradCAM()

idx = 3
data = (np.array([images_x[idx]]), np.array([images_y[idx]]))
class_index = np.argmax(images_y[idx], axis=0)
cams = explainer.explain(data, model, class_index=class_index, layer_name='activation_2')
cams = [resize(cam.numpy(), input_shape[:-1]) for cam in cams]
print(images_y[idx])

In [None]:
import matplotlib.pyplot as plt

plt.imshow(cams[0][56])