## References

https://www.tensorflow.org/tutorials/images/classification (example code is tweaked)

https://machinelearningmastery.com/how-to-visualize-filters-and-feature-maps-in-convolutional-neural-networks/

https://www.tensorflow.org/addons/api_docs/python/tfa/image/equalize

https://keras.io/examples/vision/grad_cam/



In [None]:
# %tensorflow_version 2.x  
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
import pandas as pd 
from keras.models import Sequential
from keras.layers import Dense
from keras import layers
from keras.utils import image_dataset_from_directory
import matplotlib.cm as cm
import cv2
import tensorflow_addons as tfa
from tensorflow_addons.image import equalize

In [None]:
train_data_dir = './frames/train/'
test_data_dir = './frames/test/'

In [None]:
batch_size = 32
img_height = 256
img_width = 256

In [None]:
train_data = image_dataset_from_directory(
  train_data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  crop_to_aspect_ratio=True,
  color_mode ='grayscale',
  label_mode='binary',
  batch_size=batch_size)

Found 21414 files belonging to 2 classes.
Using 17132 files for training.


In [None]:
val_data = image_dataset_from_directory(
  train_data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  crop_to_aspect_ratio=True,
  color_mode ='grayscale',
  label_mode='binary',
  batch_size=batch_size)

Found 21414 files belonging to 2 classes.
Using 4282 files for validation.


In [None]:
test_data = image_dataset_from_directory(
  test_data_dir,
  seed=123,
  image_size=(img_height, img_width),
  crop_to_aspect_ratio=True,
  color_mode ='grayscale',
  label_mode='binary',
  batch_size=batch_size)

Found 4053 files belonging to 2 classes.


In [None]:
class_names = train_data.class_names
print(class_names)

['norm', 'weap']


Image pre-processing (Histogram Equalization)

In [None]:
#Histogram equalization
train_data = train_data.map(lambda x, y: (equalize(x), y))
val_data = val_data.map(lambda x, y: (equalize(x), y))
test_data = test_data.map(lambda x, y: (equalize(x), y))

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [None]:
from tensorflow import data
AUTOTUNE = data.experimental.AUTOTUNE
train_data = train_data.cache().prefetch(buffer_size=AUTOTUNE)
val_data = val_data.cache().prefetch(buffer_size=AUTOTUNE)

Change made: 

Change MLP portion to 64x32x16x1 with sigmoid

Data Augmentation as both loss and accuracy were decreasing -> need to mitigate overfitting

In [None]:
model = Sequential([
  tf.keras.layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 1)),
  
  #Augmentation
  layers.RandomFlip(mode="horizontal", input_shape=(img_height, img_width)),
  layers.RandomRotation(factor=(-0.125, 0.125)), #Rotate within -45 to 45 degs
  layers.RandomZoom(height_factor=-0.2, width_factor=None), #Zoom in by at most 20%

  layers.Conv2D(64, 3, activation='relu'),
  layers.MaxPooling2D(3), #3x3 pooling
  tfa.layers.FilterResponseNormalization(),

  layers.Conv2D(64, 3, 2, activation='relu'),
  layers.Dropout(0.2),
  tfa.layers.FilterResponseNormalization(),

  layers.Conv2D(64, 3, 2, activation='relu'),
  layers.Dropout(0.2),
  tfa.layers.FilterResponseNormalization(),

  layers.Conv2D(16, 3, 2, activation='relu'),
  layers.MaxPooling2D(2), #2x2 pooling
  layers.Dropout(0.2),
  tfa.layers.FilterResponseNormalization(),

  
  layers.Flatten(),

  layers.Dense(64, activation='relu'),
  layers.Dense(32, activation='relu'),
  layers.Dense(16, activation='relu'),
  
  layers.Dense(1, activation='sigmoid')
])



In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 256, 256, 1)       0         
                                                                 
 random_flip (RandomFlip)    (None, 256, 256, 1)       0         
                                                                 
 random_rotation (RandomRota  (None, 256, 256, 1)      0         
 tion)                                                           
                                                                 
 random_zoom (RandomZoom)    (None, 256, 256, 1)       0         
                                                                 
 conv2d (Conv2D)             (None, 254, 254, 64)      640       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 84, 84, 64)       0         
 )                                                      

To understand the model's strengths and weaknesses, we train and visualize the activation maps and kernels.

Full train and validation sets, for 15 epochs:

In [None]:
epochs=100
history = model.fit(
  train_data,
  validation_data=val_data,
  epochs=epochs
)

Epoch 1/100




  2/536 [..............................] - ETA: 22:56:58 - loss: 1.0488 - accuracy: 0.3750

In [None]:
model.evaluate(test_data)[1]

In [None]:
def plot_loss_acc_graphs(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(epochs)

    plt.figure(figsize=(8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

In [None]:
plot_loss_acc_graphs(history)

The training and validation performance improves consistently for 15 epochs. The training performance is worse than validation due to dropout. However, the testing accuracy is poor. This could be because the model is underfitted as the loss has not saturated.

### Exploring filters of CNN

In [None]:
img_path = './frames/train/weap/1(LEE KANG WEI)_140.png'
img_size = (img_height, img_width)

In [None]:
model.summary()

In [None]:
i = 0
layer_indices = []
layer_names = []
for layer in model.layers:
	# check for convolutional layer
  if 'conv' not in layer.name:
    i += 1
    continue
	# summarize output shape
  print(i, layer.name, layer.output.shape)
  layer_indices.append(i)
  layer_names.append(layer.name)
  i += 1


In [None]:
def get_filters(j, model):    
    layer = model.layers
    filters, biases = model.layers[j].get_weights()
    print(layer[j].name, filters.shape)

    fig1=plt.figure(figsize=(8,8))
    columns = 8
    rows = 8
    n_filters = columns * rows

    for i in range(1, n_filters+1):
        try:
            f = filters[:,:,:,i-1]
        except:
            continue
        fig1 = plt.subplot(rows, columns, i)
        fig1.set_xticks([])
        fig1.set_yticks([])
        plt.imshow(f[:,:,0], cmap='gray')

    plt.show()

In [None]:
for index in layer_indices:
    get_filters(index, model)

In [None]:
from keras.models import Model

outputs = [model.layers[i].output for i in layer_indices]
model_filters = Model(inputs=model.inputs, outputs=outputs)
img = keras.preprocessing.image.load_img(img_path, target_size=img_size, grayscale=True)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array.shape

In [None]:
def show_activation_maps(img_array):
  feature_maps = model_filters.predict(img_array)
  row = 8
  column = 8
  print(len(feature_maps[1]))
  for i in range(len(feature_maps)):
    fmap = feature_maps[i]
    if (i == 3):
      row = 4
      column = 4
      
    fig = plt.figure(figsize=(12,12))
    for i in range(1, row*column+1):
      fig = plt.subplot(row,column,i)
      fig.set_xticks([])
      fig.set_yticks([])
      plt.imshow(fmap[0,:,:,i-1], cmap='gray')
    plt.show()

In [None]:
show_activation_maps(img_array)

The activation maps detect both person and background. We see a more gradual extraction of general features for deeper layers
The normalization of filter response gives better contrasts overall.

### Grad CAM

In [None]:
import matplotlib.cm as cm
from IPython.display import Image, display

img_size = (img_height, img_width)

last_conv_layer_name = layer_names[-1]

display(Image(img_path))

In [None]:
img = keras.preprocessing.image.load_img(img_path, target_size=img_size, grayscale=True)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)

In [None]:
def gradcam(img_array, model, last_conv_layer_name):

    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
      last_conv_layer_output, preds = grad_model(img_array)
      pred_index = tf.argmax(preds[0])
      class_channel = preds[:, pred_index]

    grads = tape.gradient(class_channel, last_conv_layer_output)

    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    last_conv_layer_output = last_conv_layer_output[0]
    
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

In [None]:
def show_grad(img_array, model, last_conv_layer_name, cam_path="./cam.jpg"):
    heatmap = gradcam(img_array, model, last_conv_layer_name)

    plt.matshow(heatmap)
    plt.show()

    alpha = 0.4

    cam_img = keras.preprocessing.image.load_img(img_path)
    cam_img = keras.preprocessing.image.img_to_array(cam_img)

    heatmap = np.uint8(255 * heatmap)

    jet = cm.get_cmap("jet")

    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((cam_img.shape[1], cam_img.shape[0]))
    jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)

    superimposed_img = jet_heatmap * alpha + cam_img
    superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)
    
    superimposed_img.save(cam_path)

    display(Image(cam_path))

    prediction = model.predict(img_array)
    print("Predicted class: ", prediction)
    

In [None]:
show_grad(img_array, model, last_conv_layer_name)

The heatmap shows the person is now greatly responsible for the classification. However, the background still makes some contributions.
This is much better than checkpoint 3. We believe that the model is underfitting. We expect increasing epochs will result in more localized responses in the heatmap.

Based on the activation maps, it seems that the convolutional layers are performing well.

The denser MLP and use of sigmoid activations have improved the performance

The sample weapon image is predicted as "norm". 

The goal is to detect the person and weapon. Hence we make the following changes:

2. Increase epoch count to __