## Train a deep neural network to demonstrate the use cut-out data augmentor and regularization strategy and also Grad-CAM

In [2]:
# Uncomment and run the line below if you don't have keras and hvplot installed
# !pip install -q keras
!pip install hvplot
!pip install --upgrade bokeh

Collecting hvplot
[?25l  Downloading https://files.pythonhosted.org/packages/8d/76/dff23a55164f051628dce17cf5cadb08bb594b4a47212aaec29a2b5802ea/hvplot-0.5.2-py2.py3-none-any.whl (2.5MB)
[K     |████████████████████████████████| 2.5MB 9.4MB/s 
Collecting holoviews>=1.11.0
[?25l  Downloading https://files.pythonhosted.org/packages/bc/63/3ffc471d37c50a470a8a102f098b5cc79d7971196fc2d8e75c064dade311/holoviews-1.12.7-py2.py3-none-any.whl (4.0MB)
[K     |████████████████████████████████| 4.0MB 46.8MB/s 
[?25hCollecting colorcet
[?25l  Downloading https://files.pythonhosted.org/packages/b5/a4/8a5a364492af01c8b689987ce792d0d00835bbb1203a5cd5e49798a41fbd/colorcet-2.0.2-py2.py3-none-any.whl (1.6MB)
[K     |████████████████████████████████| 1.6MB 54.2MB/s 
Collecting pyviz-comms>=0.7.2
  Downloading https://files.pythonhosted.org/packages/9d/66/77220e2da76b65da0e6daaa30091062f569935deb3ae8f4de03533496800/pyviz_comms-0.7.2-py2.py3-none-any.whl
Collecting param<2.0,>=1.8.0
[?25l  Downloading

### Import the necessary packages

In [71]:
from keras import backend as K
import time
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.datasets import cifar10
from collections import Counter

from keras.layers.convolutional import DepthwiseConv2D
from  keras.engine.input_layer import Input
from keras.models import Model
from keras.layers import concatenate
from keras.callbacks import ModelCheckpoint 
from keras.preprocessing import image

from keras.callbacks import ModelCheckpoint

from random import sample 
import random
import holoviews as hv
import pandas as pd
import hvplot.pandas
hv.extension('bokeh')

# % matplotlib inline
np.random.seed(2017) 

## Part1 Integration of Grad-CAM with model

### Define the model architecture and train the model on CIFAR-10 dataset

In [0]:
(train_features, train_labels), (test_features, test_labels) = cifar10.load_data()
num_train, img_channels, img_rows, img_cols =  train_features.shape
num_test, _, _, _ =  test_features.shape
num_classes = len(np.unique(train_labels))

train_features = train_features.astype('float32')/255
test_features = test_features.astype('float32')/255
# convert class labels to binary class labels
train_labels = np_utils.to_categorical(train_labels, num_classes)
test_labels = np_utils.to_categorical(test_labels, num_classes)


visible1 = Input(shape=(32,32,3))
conv11 = Convolution2D(32, kernel_size=(3,3), activation='relu')(visible1)
conv12 = Convolution2D(32, kernel_size=(3,1), activation='relu')(conv11) # Spatially Separable convolution
conv13 = Convolution2D(512, kernel_size=(1,3), activation='relu')(conv12) # Spatially Separable convolution
conv14=  DepthwiseConv2D(kernel_size=(3,3), activation='relu')(conv13)
conv15 = Convolution2D(32, kernel_size=(1,1), activation='relu')(conv14)

# Group1
# convg21=Convolution2D(64, kernel_size=(3,3), activation='relu')(conv15)
poolg21 = MaxPooling2D(pool_size=(2, 2))(conv15)
convg22=Convolution2D(64, kernel_size=(3,3), activation='relu')(poolg21)

# Group2
convg31=Convolution2D(64, kernel_size=(3,3), activation='relu',dilation_rate=2)(conv15)
poolg31 = MaxPooling2D(pool_size=(2, 2))(convg31)

merge = concatenate([convg22, poolg31])

mp=MaxPooling2D(pool_size=(2, 2))(merge)
cv1=Convolution2D(10, kernel_size=(1,1), activation='relu')(mp)
cv2=Convolution2D(10, kernel_size=(5,5), activation='relu')(cv1)


flat = Flatten()(cv2)

output = Activation('softmax')(flat)

In [0]:
category_labels=['Airplane','Automobile','Bird','Cat','Deer','Dog','Frog','Horse','Ship','Truck']
(train_features1, train_labels1), (test_features1, test_labels1) = cifar10.load_data()

### We will train the model with different levels of accuracy and see its impact on grad-cams's result

Model1:

In [77]:
model = Model(inputs=[visible1], outputs=output)

# model.add(Activation('softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print(model.summary())

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
model.fit(train_features, train_labels, batch_size=64, epochs=1, verbose=1,validation_data=(test_features, test_labels),callbacks=callbacks_list)

Model: "model_8"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_65 (Conv2D)              (None, 30, 30, 32)   896         input_9[0][0]                    
__________________________________________________________________________________________________
conv2d_66 (Conv2D)              (None, 28, 30, 32)   3104        conv2d_65[0][0]                  
__________________________________________________________________________________________________
conv2d_67 (Conv2D)              (None, 28, 28, 512)  49664       conv2d_66[0][0]                  
____________________________________________________________________________________________

<keras.callbacks.History at 0x7fb3678d5748>

### Find layers of models

In [0]:
layers=model.layers
layers_names=[layer.name for layer in layers]

### Define the function to run gradgam on a image

In [0]:
import cv2
def return_grad_cam_result(model,layer,train_index):
  x = image.img_to_array(train_features[train_index,:,:,:]) 
  # x = np.expand_dims(train_features[24,:,:,:], axis=0)
  x = np.expand_dims(x, axis=0)
  preds = model.predict(x)
  probab=np.round(np.max(preds[0])*100,2)
  class_idx = np.argmax(preds[0])
  class_output = model.output[:, class_idx]

  # for layer in layers_names:
  conv_layer = model.get_layer(layer)
  grads = K.gradients(class_output, conv_layer.output)[0]
  pooled_grads = K.mean(grads, axis=(0,1,2))
  iterate = K.function([model.input], [grads,pooled_grads, conv_layer.output[0]])
  grads_value,pooled_grads_value, conv_layer_output_value = iterate([x])
  # print(layer)
  # print(pooled_grads_value)

  for i in range(conv_layer_output_value.shape[2]):
    conv_layer_output_value[:, :, i] *= pooled_grads_value[i]

  heatmap = np.mean(conv_layer_output_value, axis = -1)
  heatmap = np.maximum(heatmap, 0)
  heatmap /= np.max(heatmap)

  heatmap = cv2.resize(heatmap, (32, 32))
  # heatmap=np.resize(heatmap,(32, 32))
  heatmap = np.uint8(255 * heatmap)
  heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
  superimposed_img = cv2.addWeighted(train_features1[train_index,:,:,:], 0.5, heatmap, 0.5, 0)

  layout=hv.RGB(superimposed_img,label='Label:{} Prediction:{}'.format(category_labels[np.argmax(train_labels[train_index])],category_labels[class_idx]))+\
  hv.RGB(heatmap,label='Heatmap')
  layout=layout.opts(title='Prediction Probab:'+str(probab)+'%')

  # Plot heatmap channel wise
  heatmap1=  pd.DataFrame(heatmap[:,:,0]).hvplot.heatmap(
        x='columns', 
        y='index',title='Channel '+str(0))
  for i in range(1,heatmap.shape[2]):
    heatmap1=heatmap1+pd.DataFrame(heatmap[:,:,i]).hvplot.heatmap(
        x='columns', 
        y='index',title='Channel '+str(i))
  heatmap1=heatmap1.opts(title='Channel Wise Heatmap for output for layer {}'.format(layer.title()))

  # Plot output of given convolution layer channel wise
  conv_output_plot=  pd.DataFrame(conv_layer_output_value[:,:,0]).hvplot.heatmap(
        x='columns', 
        y='index',title='Conv Layer Channel '+str(0))
  for i in range(1,conv_layer_output_value.shape[2]):
    conv_output_plot=conv_output_plot+pd.DataFrame(conv_layer_output_value[:,:,i]).hvplot.heatmap(
        x='columns', 
        y='index',title='Conv Layer Channel '+str(i))
  conv_output_plot=conv_output_plot.opts(title='Heatmap for output for layer {}'.format(layer.title()))
  
  # Plot gradients channel wise
  grad_plot=  pd.DataFrame(grads_value[0,:,:,:][:,:,0]).hvplot.heatmap(
        x='columns', 
        y='index',title='Gradient Channel '+str(0))
  for i in range(1,grads_value.shape[3]):
    grad_plot=grad_plot+pd.DataFrame(grads_value[0,:,:,:][:,:,i]).hvplot.heatmap(
        x='columns', 
        y='index',title='Gradient Channel '+str(i))
  grad_plot=grad_plot.opts(title='Heatmap for gradients for layer {}'.format(layer.title()))
  return layout,heatmap1,conv_output_plot,grad_plot,probab

In [86]:
layer

'conv2d_71'

In [83]:
layer=layers_names[12]
train_index=36
from keras.preprocessing import image
layout,heatmap1,conv_output_plot,grad_plot,probab=return_grad_cam_result(model,layer,train_index)



### Show the suprimposed heatmap on the image and also heatmap channel wise

In [88]:
# For better view change extension to 'bokeh'
hv.extension('matplotlib')
(layout+heatmap1).cols(2).opts(title='Prediction Probab:'+str(probab)+'%')

### Show channels for a given convolution layer and also channels for gradients of output with respect to the given convolution layer

In [94]:
hv.extension('matplotlib')
(conv_output_plot+grad_plot).cols(2)

### Model2:

In [95]:
visible1 = Input(shape=(32,32,3))
conv11 = Convolution2D(32, kernel_size=(3,3), activation='relu')(visible1)
conv12 = Convolution2D(32, kernel_size=(3,1), activation='relu')(conv11) # Spatially Separable convolution
conv13 = Convolution2D(512, kernel_size=(1,3), activation='relu')(conv12) # Spatially Separable convolution
conv14=  DepthwiseConv2D(kernel_size=(3,3), activation='relu')(conv13)
conv15 = Convolution2D(32, kernel_size=(1,1), activation='relu')(conv14)

# Group1
# convg21=Convolution2D(64, kernel_size=(3,3), activation='relu')(conv15)
poolg21 = MaxPooling2D(pool_size=(2, 2))(conv15)
convg22=Convolution2D(64, kernel_size=(3,3), activation='relu')(poolg21)

# Group2
convg31=Convolution2D(64, kernel_size=(3,3), activation='relu',dilation_rate=2)(conv15)
poolg31 = MaxPooling2D(pool_size=(2, 2))(convg31)

merge = concatenate([convg22, poolg31])

mp=MaxPooling2D(pool_size=(2, 2))(merge)
cv1=Convolution2D(10, kernel_size=(1,1), activation='relu')(mp)
cv2=Convolution2D(10, kernel_size=(5,5), activation='relu')(cv1)


flat = Flatten()(cv2)

output = Activation('softmax')(flat)

model = Model(inputs=[visible1], outputs=output)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print(model.summary())

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
model.fit(train_features, train_labels, batch_size=64, epochs=2, verbose=1,validation_data=(test_features, test_labels),callbacks=callbacks_list)

Model: "model_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_73 (Conv2D)              (None, 30, 30, 32)   896         input_10[0][0]                   
__________________________________________________________________________________________________
conv2d_74 (Conv2D)              (None, 28, 30, 32)   3104        conv2d_73[0][0]                  
__________________________________________________________________________________________________
conv2d_75 (Conv2D)              (None, 28, 28, 512)  49664       conv2d_74[0][0]                  
____________________________________________________________________________________________

<keras.callbacks.History at 0x7fb363885f60>

### Find layers of models

In [0]:
layers=model.layers
layers_names=[layer.name for layer in layers]

In [0]:
layer=layers_names[12]
train_index=36
import cv2
layout,heatmap1,conv_output_plot,grad_plot,probab=return_grad_cam_result(model,layer,train_index)

In [100]:
hv.extension('matplotlib')
(layout+heatmap1).cols(2).opts(title='Prediction Probab:'+str(probab)+'%')

### Show channels for a given convolution layer and also channels for gradients of output with respect to the given convolution layer

In [101]:
hv.extension('matplotlib')
(conv_output_plot+grad_plot).cols(2)

### Model3:

In [104]:
visible1 = Input(shape=(32,32,3))
conv11 = Convolution2D(32, kernel_size=(3,3), activation='relu')(visible1)
conv12 = Convolution2D(32, kernel_size=(3,1), activation='relu')(conv11) # Spatially Separable convolution
conv13 = Convolution2D(512, kernel_size=(1,3), activation='relu')(conv12) # Spatially Separable convolution
conv14=  DepthwiseConv2D(kernel_size=(3,3), activation='relu')(conv13)
conv15 = Convolution2D(32, kernel_size=(1,1), activation='relu')(conv14)

# Group1
# convg21=Convolution2D(64, kernel_size=(3,3), activation='relu')(conv15)
poolg21 = MaxPooling2D(pool_size=(2, 2))(conv15)
convg22=Convolution2D(64, kernel_size=(3,3), activation='relu')(poolg21)

# Group2
convg31=Convolution2D(64, kernel_size=(3,3), activation='relu',dilation_rate=2)(conv15)
poolg31 = MaxPooling2D(pool_size=(2, 2))(convg31)

merge = concatenate([convg22, poolg31])

mp=MaxPooling2D(pool_size=(2, 2))(merge)
cv1=Convolution2D(10, kernel_size=(1,1), activation='relu')(mp)
cv2=Convolution2D(10, kernel_size=(5,5), activation='relu')(cv1)


flat = Flatten()(cv2)

output = Activation('softmax')(flat)

model = Model(inputs=[visible1], outputs=output)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print(model.summary())

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
model.fit(train_features, train_labels, batch_size=64, epochs=10, verbose=1,validation_data=(test_features, test_labels),callbacks=callbacks_list)

Model: "model_12"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_13 (InputLayer)           (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_97 (Conv2D)              (None, 30, 30, 32)   896         input_13[0][0]                   
__________________________________________________________________________________________________
conv2d_98 (Conv2D)              (None, 28, 30, 32)   3104        conv2d_97[0][0]                  
__________________________________________________________________________________________________
conv2d_99 (Conv2D)              (None, 28, 28, 512)  49664       conv2d_98[0][0]                  
___________________________________________________________________________________________

<keras.callbacks.History at 0x7fb3619f0978>

### Find layers of models

In [0]:
layers=model.layers
layers_names=[layer.name for layer in layers]

In [106]:
layer=layers_names[12]
train_index=36
import cv2
layout,heatmap1,conv_output_plot,grad_plot,probab=return_grad_cam_result(model,layer,train_index)



In [107]:
hv.extension('matplotlib')
(layout+heatmap1).cols(2).opts(title='Prediction Probab:'+str(probab)+'%')

In [108]:
hv.extension('matplotlib')
(conv_output_plot+grad_plot).cols(2)

# Part 2 Implementation of cutout

### Define the function cut-out[also called as random-erasing]

In [0]:
def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=False):
    def eraser(input_img):
        input_img1=input_img.copy()
        img_h, img_w, img_c = input_img1.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img1

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        if pixel_level:
            c = np.random.uniform(v_l, v_h, (h, w, img_c))
        else:
            c = np.random.uniform(v_l, v_h)

        input_img1[top:top + h, left:left + w, :] = c

        return input_img1

    return eraser

###  


### Showing how keras image data generator works and how to integerate cut-out function with it:
 1. Load some samples from train data for data points along with their labels
 2. Define ImageDataGenerator instance with cutout function passed to preprocessing_function argument
 3. Pass x_train1,y_train1 through datagen.flow() function with batch_size=1. datagen.flow() function will take train data and its labels and randomly apply preprocessing to them equal to the batch size. 
 4. Here I am using batch_size of 1 just to show how each input image is changed by ImageDataGenerator function
 5. Then loop over batches for say 10[just some random number] times, and save its hvplot image in a list
 6. Show hvplot image for 10 images changed by cut-out randomly

 Below we can see how cutouts are applied to different images randomly. And same image can be chosen more than 1 time, ImageDataGenerator samples images with replacement.

In [0]:
from  keras.preprocessing.image import ImageDataGenerator
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train1=x_train[0:10,:,:,:]
y_train1=y_train[0:10]

datagen = ImageDataGenerator(vertical_flip=False,preprocessing_function=get_random_eraser())

batches = 0
ll=[]
for x_batch, y_batch in datagen.flow(x_train1, y_train1, batch_size=1):
  ll.append(hv.RGB(x_batch[0,:,:,:].astype(int)))
  # display(hv.RGB(x_batch[0,:,:,:].astype(int)))
  batches+=1
  if batches==10:
    break
img=ll[0]
for image in ll[1:]:
  img+=image    

In [112]:
hv.extension('matplotlib')
img.cols(5)

# Define the model

In [0]:
(train_features, train_labels), (test_features, test_labels) = cifar10.load_data()
num_train, img_channels, img_rows, img_cols =  train_features.shape
num_test, _, _, _ =  test_features.shape
num_classes = len(np.unique(train_labels))

train_features = train_features.astype('float32')/255
test_features = test_features.astype('float32')/255
# convert class labels to binary class labels
train_labels = np_utils.to_categorical(train_labels, num_classes)
test_labels = np_utils.to_categorical(test_labels, num_classes)


visible1 = Input(shape=(32,32,3))
conv11 = Convolution2D(32, kernel_size=(3,3), activation='relu')(visible1)
conv12 = Convolution2D(32, kernel_size=(3,1), activation='relu')(conv11) # Spatially Separable convolution
conv13 = Convolution2D(512, kernel_size=(1,3), activation='relu')(conv12) # Spatially Separable convolution
conv14=  DepthwiseConv2D(kernel_size=(3,3), activation='relu')(conv13)
conv15 = Convolution2D(32, kernel_size=(1,1), activation='relu')(conv14)

# Group1
# convg21=Convolution2D(64, kernel_size=(3,3), activation='relu')(conv15)
poolg21 = MaxPooling2D(pool_size=(2, 2))(conv15)
convg22=Convolution2D(64, kernel_size=(3,3), activation='relu')(poolg21)

# Group2
convg31=Convolution2D(64, kernel_size=(3,3), activation='relu',dilation_rate=2)(conv15)
poolg31 = MaxPooling2D(pool_size=(2, 2))(convg31)

merge = concatenate([convg22, poolg31])

mp=MaxPooling2D(pool_size=(2, 2))(merge)
cv1=Convolution2D(10, kernel_size=(1,1), activation='relu')(mp)
cv2=Convolution2D(10, kernel_size=(5,5), activation='relu')(cv1)


flat = Flatten()(cv2)

output = Activation('softmax')(flat)
model = Model(inputs=[visible1], outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Apply random-cutout and train the model and show grad-CAM images

In [114]:
datagen = ImageDataGenerator(zoom_range=0.0, 
                             horizontal_flip=False,preprocessing_function=get_random_eraser())


# train the model
start = time.time()
# Train the model
model_info = model.fit_generator(datagen.flow(train_features, train_labels, batch_size = 128),
                                 samples_per_epoch = train_features.shape[0], nb_epoch = 5, 
                                 validation_data = (test_features, test_labels), verbose=1)
end = time.time()
print ("Model took %0.2f seconds to train"%(end - start))
# plot model history
# plot_model_history(model_info)
# compute test accuracy
# print ("Accuracy on test data is: %0.2f"%accuracy(test_features, test_labels, model))

  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model took 166.69 seconds to train


### Get layer names

In [0]:
layers=model.layers
layers_names=[layer.name for layer in layers]

In [0]:
import cv2
from keras.preprocessing import image
layer=layers_names[12]
train_index1=36
train_index2=70
train_index3=30
train_index4=200
train_index5=21
layout1,heatmap11,conv_output_plot1,grad_plot1,probab1=return_grad_cam_result(model,layer,train_index1)
layout2,heatmap12,conv_output_plot2,grad_plot2,probab2=return_grad_cam_result(model,layer,train_index2)
layout3,heatmap13,conv_output_plot3,grad_plot3,probab3=return_grad_cam_result(model,layer,train_index3)
layout4,heatmap14,conv_output_plot4,grad_plot4,probab4=return_grad_cam_result(model,layer,train_index4)
layout5,heatmap15,conv_output_plot5,grad_plot5,probab5=return_grad_cam_result(model,layer,train_index5)

In [117]:
hv.extension('matplotlib')
(layout1+layout2+layout3+layout4+layout5).cols(2)