# Deep leanring project

## 1 Data loading and argumenting

In [0]:
##### before running it, make sure you don't have lots of big files in your google drive
##### otherwise it's going to take too long to finish running it before giving the TIMEOUT error
##### also save the train_controls, train_patients, val_controls, val_patients to your drive and
##### create a "train" folder with train_controls, train_patients in it, and 
##### a "val" folder with val_controls, val_patients in it.
##### change the train_dir and val_dir in the next cell to the dir of your train and val folder

from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /gdrive


## 1.1 Seeds and predefined stuffs

In [0]:
from numpy.random import seed
seed(137)
from tensorflow import set_random_seed
set_random_seed(191)

# Dir (Comment out others when you run the code)
# e.g. /gdrive/My Drive/deep_learning/new_dataset/test/controls/View2098.jpg
#          |                                             |

# Kavi's

# Daniel's

# Chelsea's Probs
train_dir = "/gdrive/My Drive/kaggle_dataset/train"
val_dir = "/gdrive/My Drive/kaggle_dataset/val"
test_dir = "/gdrive/My Drive/kaggle_dataset/test"


img_width = 224
img_height = 224
batch_size = 200
channels = 3
epochs = 50
nb_train_samples = 8000
nb_valid_samples = 32
nb_test_samples = 968


## 1.2 Data loading

In [0]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)             
val_datagen = ImageDataGenerator(rescale=1./255)              

train_generator = train_datagen.flow_from_directory(
        train_dir, 
        target_size=(img_height, img_width),
        batch_size=batch_size,
        shuffle = True,
        class_mode='categorical')   

validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle = True,
    class_mode='categorical') #weight toward one class or another

#Keras takes care of generating labels if the directory structure matches above!
label_mapT = train_generator.class_indices
print(label_mapT)

label_mapV = validation_generator.class_indices
print(label_mapV)

for data_batch, labels_batch in train_generator:
    print ('data batch shape:', data_batch.shape)
    #print(data_batch)
    print('labels batch shape:', labels_batch.shape)
    #print(labels_batch)
    break
    
nb_train_samples = len(train_generator.filenames)
nb_validation_samples = len(validation_generator.filenames)

Using TensorFlow backend.


Found 8000 images belonging to 4 classes.
Found 32 images belonging to 4 classes.
{'CNV': 0, 'DME': 1, 'DRUSEN': 2, 'NORMAL': 3}
{'CNV': 0, 'DME': 1, 'DRUSEN': 2, 'NORMAL': 3}
data batch shape: (200, 224, 224, 3)
labels batch shape: (200, 4)


## 2 Model training

In [0]:
import numpy as np 
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Model, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Softmax, Flatten, Dense, BatchNormalization, GlobalAveragePooling2D
from keras.layers import Lambda
from keras.metrics import categorical_accuracy
from keras import regularizers
import tensorflow as tf
from keras.models import Sequential

from keras.callbacks import TensorBoard

#from keras.layers import Input, Dense
from keras.models import Model

input_shape = (img_height, img_width, channels)

model = Sequential()

model.add(Conv2D(32,(11, 11), input_shape=input_shape))#, kernel_regularizer=regularizers.l1(0.01))) 
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Conv2D(32,(3, 3), input_shape=input_shape))#, kernel_regularizer=regularizers.l1(0.01))) 
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Conv2D(32,(3, 3), input_shape=input_shape))#, kernel_regularizer=regularizers.l1(0.01))) 
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())


model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5)) 

model.add(Dense(4))
model.add(Activation('sigmoid'))

In [0]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 214, 214, 32)      11648     
_________________________________________________________________
activation_1 (Activation)    (None, 214, 214, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 107, 107, 32)      0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 107, 107, 32)      128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 105, 105, 32)      9248      
_________________________________________________________________
activation_2 (Activation)    (None, 105, 105, 32)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 52, 52, 32)        0         
__________

In [0]:
# For early stopping
import keras
from keras.callbacks import TensorBoard, Callback, EarlyStopping

class MetricsCheckpoint(Callback):
    """Callback that saves metrics after each epoch"""
    def __init__(self, savepath):
        super(MetricsCheckpoint, self).__init__()
        self.savepath = savepath
        self.history = {}
    def on_epoch_end(self, epoch, logs=None):
        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        np.save(self.savepath, self.history)
        
callbacks_list = [EarlyStopping(monitor='val_acc', patience=10, verbose=1)]

In [0]:
from keras import optimizers

epochs = 50

opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.1)

model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

val_step = 1 ## nb_validation_samples // batch_size

    
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=val_step,
    shuffle=True,
    callbacks=callbacks_list+[MetricsCheckpoint('logs')])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 00023: early stopping


<keras.callbacks.History at 0x7fa16fa2ae48>

## 3 Prediction

In [0]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(img_height, img_width),
        color_mode="rgb",
        class_mode='categorical',
        shuffle=False,
        batch_size=batch_size)

filenames = test_generator.filenames
nb_samples = len(filenames)

predict = model.predict_generator(test_generator, steps = np.ceil(nb_samples / batch_size))
    
# Getting categorical prediction
predict = np.round_(predict)

model.evaluate_generator(test_generator, steps = np.ceil(nb_samples / batch_size))

Found 968 images belonging to 4 classes.


[1.2281682565192547, 0.4359504077429614]

In [0]:
labels = test_generator.classes
predictions = predict

print(labels.shape)
print(predictions.shape)

predictions = predictions.reshape(len(labels))

print(predictions)

FP_list = []
FN_list = []

#FP
for i in range(len(labels)):
  if labels[i] == 0 and predictions[i] == 1:
    FP_list.append(filenames[i])

#FN
for i in range(len(labels)):
  if labels[i] == 1 and predictions[i] == 0:
    FN_list.append(filenames[i])

print(len(FP_list))
print(len(FN_list))

(968,)
(968, 4)


ValueError: ignored

## 4 Model visualization

## 1 Preamble downlaoding

In [0]:
!pip install git+https://github.com/raghakot/keras-vis.git

from keras.applications import ResNet50
from vis.utils import utils
from keras import activations

# Hide warnings on Jupyter Notebook
import warnings
warnings.filterwarnings('ignore')

## 2 Display images (to make sure the intended ones are shown)

In [0]:
from vis.utils import utils
from matplotlib import pyplot as plt
%matplotlib inline

plt.rcParams['figure.figsize'] = (18, 6)


img1 = utils.load_img(test_dir + '/controls/View2098.png', target_size=(img_height, img_width))
img2 = utils.load_img(test_dir + '/patients/19105.png', target_size=(img_height, img_width))


f, ax = plt.subplots(1, 2)
ax[0].imshow(img1)
ax[1].imshow(img2)

### 3 Attention heatmap displayed on the gray-scale image

### 3.1 Showing and saving FP

In [0]:
from vis.visualization import visualize_saliency, overlay
from vis.utils import utils
from keras import activations
from vis.visualization import visualize_cam
import matplotlib.cm as cm
from vis.utils import utils
from matplotlib import pyplot as plt

# Convert RBG to Grey-scale
def converter(x):
   #x has shape (width, height, channels)
    return (0.21 * x[:,:,:1]) + (0.72 * x[:,1:2]) + (0.07 * x[:,:,-1:])

penultimate_layer = utils.find_layer_idx(model, 'conv2d_9') #If an error occurs, find the name of layer in the model summary
layer_idx = utils.find_layer_idx(model, 'dense_6')

FPFN_dir =  "/gdrive/My Drive/deep_learning/FP&FN"

import os
FN_list = os.listdir("/gdrive/My Drive/deep_learning/FP&FN/FN")
FP_list = os.listdir("/gdrive/My Drive/deep_learning/FP&FN/FP")

FP_imglist = []
for i in range(len(FP_list)):
   FP_imglist.append(utils.load_img(FPFN_dir + '/FP/' + FP_list[i], target_size=(img_height, img_width)))

from google.colab import files

from PIL import Image



for modifier in [None]:
   plt.figure()
   f, ax = plt.subplots(1, 2)
   plt.suptitle("vanilla" if modifier is None else modifier)
   for i, img in enumerate(FP_imglist):
#         grads = visualize_saliency(model, layer_idx, filter_indices=20, seed_input=img)

#         # visualize grads as heatmap
#         ax[i].imshow(grads, cmap='jet')

       grads = visualize_cam(model, layer_idx, filter_indices=0,
                             seed_input=img, penultimate_layer_idx=penultimate_layer,
                             backprop_modifier=modifier)
       jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
       grey_img = converter(img)

       save_img = overlay(jet_heatmap, grey_img)
       im = Image.fromarray(save_img)
       im.save(str(i)+".png")
       files.download(str(i)+".png")

#         ax[i].imshow(overlay(jet_heatmap, grey_img))

### 3.2 FN

In [0]:
from vis.visualization import visualize_saliency, overlay
from vis.utils import utils
from keras import activations
from vis.visualization import visualize_cam
import matplotlib.cm as cm
from vis.utils import utils
from matplotlib import pyplot as plt

# Convert RBG to Grey-scale
def converter(x):
   #x has shape (width, height, channels)
    return (0.21 * x[:,:,:1]) + (0.72 * x[:,1:2]) + (0.07 * x[:,:,-1:])

penultimate_layer = utils.find_layer_idx(model, 'conv2d_9') #If an error occurs, find the name of layer in the model summary
layer_idx = utils.find_layer_idx(model, 'dense_6')

FPFN_dir =  "/gdrive/My Drive/deep_learning/FP&FN"

import os
FN_list = os.listdir("/gdrive/My Drive/deep_learning/FP&FN/FN")
FP_list = os.listdir("/gdrive/My Drive/deep_learning/FP&FN/FP")

FN_imglist = []
for i in range(len(FN_list)):
   FN_imglist.append(utils.load_img(FPFN_dir + '/FN/' + FN_list[i], target_size=(img_height, img_width)))

from google.colab import files

from PIL import Image



for modifier in [None]:
   plt.figure()
   f, ax = plt.subplots(1, 2)
   plt.suptitle("vanilla" if modifier is None else modifier)
   for i, img in enumerate(FN_imglist):

       grads = visualize_cam(model, layer_idx, filter_indices=0,
                             seed_input=img, penultimate_layer_idx=penultimate_layer,
                             backprop_modifier=modifier)
       jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
       grey_img = converter(img)

       save_img = overlay(jet_heatmap, grey_img)
       im = Image.fromarray(save_img)
       im.save(str(i)+".png")
       files.download(str(i)+".png")

#         ax[i].imshow(overlay(jet_heatmap, grey_img))

In [0]:
from vis.visualization import visualize_saliency, overlay
from vis.utils import utils
from keras import activations
from vis.visualization import visualize_cam
import matplotlib.cm as cm

# Convert RBG to Grey-scale
def converter(x):
    #x has shape (width, height, channels)
    return (0.21 * x[:,:,:1]) + (0.72 * x[:,1:2]) + (0.07 * x[:,:,-1:])

penultimate_layer = utils.find_layer_idx(model, 'conv2d_9') #If an error occurs, find the name of layer in the model summary
layer_idx = utils.find_layer_idx(model, 'dense_6')

for modifier in [None, 'guided', 'relu']:
    plt.figure()
    f, ax = plt.subplots(1, 2)
    plt.suptitle("vanilla" if modifier is None else modifier)
    for i, img in enumerate([img1, img2]):    
        
        grads = visualize_cam(model, layer_idx, filter_indices=0, 
                              seed_input=img, penultimate_layer_idx=penultimate_layer,
                              backprop_modifier=modifier)        
        # Lets overlay the heatmap onto original image.    
        jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
        grey_img = converter(img)

        ax[i].imshow(overlay(jet_heatmap, grey_img))

### 3 Attention heatmap displayed on the origin image

In [0]:
for modifier in [None, 'guided', 'relu']:
    plt.figure()
    f, ax = plt.subplots(1, 4)
    plt.suptitle("vanilla" if modifier is None else modifier)
    for i, img in enumerate([img1, img2]):    
        # 20 is the imagenet index corresponding to `ouzel`
        grads = visualize_cam(model, layer_idx, filter_indices=0, 
                              seed_input=img, penultimate_layer_idx=penultimate_layer,
                              backprop_modifier=modifier)        
        # Lets overlay the heatmap onto original image.    
        jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
        ax[i].imshow(overlay(jet_heatmap, img))
        ax[i + 2].imshow(img)