In [None]:
!pip install keras-vis

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import pickle

from tqdm import tqdm
import glob
import os
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns
import pprint
import pydicom as dicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import albumentations as A 
import cv2

from sklearn.metrics import roc_curve,roc_auc_score, auc
import sklearn

from PIL import Image

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
os.listdir('../input')
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from kaggle_datasets import KaggleDatasets
from sklearn.metrics import roc_curve

import tensorflow.keras as K
import tensorflow as tf
try:
    import tensorflow_io as tfio
except:
    print('tensorflow_io not installed')
import re
import vis ## keras-vis
from vis.utils import utils

## History analysis
Ora che ho fatto training per 100 epochs posso analizzare la history e il comportamento del modello

In [None]:
hf, wf = [], []
for root, dirs, files in os.walk('../input/siim-covid-pooled/'):
    for file in files:
        if 'history' in file:
            hf.append(os.path.join(root,file))
        elif 'weights' in file:
            wf.append(os.path.join(root,file))
            
hf, wf

In [None]:
hf = '../input/reti-siim-covid/resmodel_pooled_history_16_07_2021__20_40.pkl'
wf = '../input/reti-siim-covid/resmodel_pooled_weights_16_07_2021__20_40.h5'

In [None]:
history = pickle.load(open(hf, 'rb'))
model = K.models.load_model(wf)
model.summary()

In [None]:
history['val_accuracy'][-1]

In [None]:
print(history.keys())
fig, ax = plt.subplots(2, 2, figsize=[15, 8], gridspec_kw=dict(left=0.1, right=0.9, bottom=0, top=1))

# summarize history for accuracy
ax[0,0].plot(history['accuracy'], label='train')
ax[0,0].plot(history['val_accuracy'], label='test')
ax[0,0].set_title('model accuracy')
ax[0,0].set_ylabel('accuracy')
ax[0,0].set_xlabel('epoch')
ax[0,0].legend(loc='upper left')
# summarize history for loss

ax[0,1].plot(history['loss'], label='train')
ax[0,1].plot(history['val_loss'], label='test')
ax[0,1].set_title('model loss')
ax[0,1].set_ylabel('loss')
ax[0,1].set_xlabel('epoch')
ax[0,1].legend(loc='upper left')

ax[1,0].plot(history['auc'], label='train')
ax[1,0].plot(history['val_auc'], label='test')
ax[1,0].set_title('model auc')
ax[1,0].set_ylabel('auc')
ax[1,0].set_xlabel('epoch')
ax[1,0].legend(loc='upper left')

ax[1,1].plot(history['prc'], label='train')
ax[1,1].plot(history['val_prc'], label='test')
ax[1,1].set_title('model prc')
ax[1,1].set_ylabel('prc')
ax[1,1].set_xlabel('epoch')
ax[1,1].legend(loc='upper left')

plt.savefig('stats.png')
plt.show()

In [None]:
x_train = np.load('../input/reti-siim-covid/x_train.npy')
y_train = np.load('../input/reti-siim-covid/y_train.npy')

In [None]:
dist = [len(y_train[y_train[:, i] == 1]) for i in range(4)]
labels = ['Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance']

fig, ax = plt.subplots(1,2, figsize=[15, 7])

sns.barplot(x=[l.split()[0] for l in labels], y=dist, ax=ax[0])
ax[0].set_title('Category distribution in the training dataset')
ax[1].imshow(x_train[0])
ax[1].set_title('Example image')
plt.savefig('cat_distribution.png')
plt.show()

In [None]:
VAL_SPLIT = 0.15
BATCH_SIZE = 200
datagen = K.preprocessing.image.ImageDataGenerator(rotation_range=30,
                                                   width_shift_range=0.1,
                                                   height_shift_range=0.1,
                                                   brightness_range=(0.8, 1.2),
                                                   shear_range=15,
                                                   horizontal_flip=True,
                                                   vertical_flip=True,
                                                   validation_split=VAL_SPLIT,
                                                  )


train_it = datagen.flow(x_train,y_train, batch_size=BATCH_SIZE,subset='training')

validation_it = datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, subset='validation')

In [None]:
x_pred, y_true = validation_it.next()
y_true = np.array([np.argmax(x) for x in y_true])

pred = model.predict(x_pred)
y_val = np.array([np.argmax(x) for x in pred])
cm = sklearn.metrics.confusion_matrix(y_true, y_val, labels=[0,1,2,3], normalize='pred')

In [None]:
plt.figure(figsize = (10,7))
sns.heatmap(cm, annot=True, cmap="YlGnBu")
plt.savefig('confusion_matrix.png')

In [None]:
x_pred, y_true = validation_it.next()

y_pred = model.predict(x_pred)

In [None]:
n_classes = 4

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot of a ROC curve for a specific class

fig, ax = plt.subplots(2, 2, figsize=[15, 8], gridspec_kw=dict(left=0.1, right=0.9, bottom=0, top=1))
for i in range(n_classes):
    ax_t = ax[i//2, i%2]
    ax_t.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i])
    ax_t.plot([0, 1], [0, 1], 'k--')
    ax_t.set_xlabel('False Positive Rate')
    ax_t.set_ylabel('True Positive Rate')
    ax_t.set_title(f'ROC for class {labels[i]}')
    ax_t.legend(loc="lower right")
    
plt.savefig('roc.png')
plt.show()

In [None]:
single_it = datagen.flow(x_train, y_train, batch_size=1, subset='validation')
img, y = single_it.next()
class_idxs_sorted = np.argsort(y.flatten())[::-1]
classlabel = labels

In [None]:
# Utility to search for layer index by name. 
# Alternatively we can specify this as -1 since it corresponds to the last layer.

layer_idx = -1
model.layers[layer_idx].activation = K.activations.linear
linear_model = utils.apply_modifications(model)
class_idx = class_idxs_sorted[0]
class_idx = class_idxs_sorted[0]

def get_saliency_map(model, img, class_idx):
    image = tf.convert_to_tensor(img, np.float32)
    with tf.GradientTape() as tape:
        tape.watch(image)
        predictions = model(image)
        
        loss = predictions[:, class_idx]
    
    # Get the gradients of the loss w.r.t to the input image.
    gradient = tape.gradient(loss, image)
    
    # take maximum across channels
    gradient = tf.reduce_max(gradient, axis=-1)
    
    # convert to numpy
    gradient = gradient.numpy()
    
    # normaliz between 0 and 1
    min_val, max_val = np.min(gradient), np.max(gradient)
    smap = (gradient - min_val) / (max_val - min_val + K.backend.epsilon())
    
    return smap[0]

def plot_map(grads, image):
    img = image[0, :, : , 0]
    fig, axes = plt.subplots(1,2,figsize=(14,5))
    axes[0].imshow(img, cmap=plt.cm.gray)
    axes[1].imshow(img, cmap=plt.cm.gray)
    i = axes[1].imshow(grads,cmap="jet",alpha=0.5)
    fig.colorbar(i)
    plt.suptitle("Pr(class={}) = {:5.2f}".format(
                      classlabel[class_idx],
                      y_pred[0,class_idx]))

In [None]:
for class_idx in class_idxs_sorted:
    plot_map(get_saliency_map(model, img, class_idx), img)