## Imports

In [2]:
! pip install tf-explain

In [3]:
import cv2
import numpy as np
import pandas as pd

import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns


import pickle


import skimage
from skimage.feature import hog, canny
from skimage.filters import sobel
from skimage import color

from sklearn.preprocessing import LabelEncoder, OneHotEncoder


from keras import layers
import keras.backend as K
from keras.models import Sequential, Model
from keras.preprocessing import image
from tensorflow.keras.utils import load_img, img_to_array
from keras.layers import Input, Dense, Activation, Dropout
from keras.layers import Flatten, BatchNormalization
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.vgg19 import preprocess_input
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications import ResNet50
from tf_explain.core.activations import ExtractActivations
from tf_explain.core.grad_cam import GradCAM
from sklearn.model_selection import train_test_split
from keras.utils.data_utils import get_file
from keras.utils.np_utils import to_categorical

from PIL import Image
from tqdm import tqdm
import random as rnd
from keras.preprocessing.image import ImageDataGenerator

## Loading the data

In [4]:
train_df = pd.read_csv('../input/state-farm-distracted-driver-detection/driver_imgs_list.csv')
train_df['path'] = '../input/state-farm-distracted-driver-detection/imgs/train/' + train_df['classname'] + '/' + train_df['img']
pre_df = pd.read_csv('../input/state-farm-distracted-driver-detection/sample_submission.csv')

In [5]:
classes = {
    'c0': 'normal driving',
    'c1': 'texting - right',
    'c2': 'talking on the phone - right',
    'c3': 'texting - left',
    'c4': 'talking on the phone - left',
    'c5': 'operating the radio',
    'c6': 'drinking',
    'c7': 'reaching behind',
    'c8': 'hair and makeup',
    'c9': 'talking to passenger'
}

In [6]:
train_df.keys()

In [7]:
train_df.head()

In [8]:
train_df['classname'].value_counts()

In [9]:
train_df.isna().sum()


## EDA

In [10]:
plt.figure(figsize=(20,15))
for idx, id in enumerate(train_df['classname'].unique()):
    plt.subplot(3,4,idx+1)
    plt.imshow(plt.imread(train_df[train_df['classname'] == id]['path'].values[0]))
    plt.title(classes[id])
    plt.axis('off')
plt.tight_layout()
plt.show()

### Class Distribution

In [11]:
dictionary = train_df['classname'].value_counts().to_dict()
# visuallize the dictionary as histogram
plt.figure(figsize=(5,8))
plt.bar(range(len(dictionary)), dictionary.values(), align='center')
plt.xticks(range(len(dictionary)), dictionary.keys())
plt.title('Number of images per class')
plt.show()

### Image Resolution

In [12]:
widths, heights = [], []

for path in tqdm(train_df["path"]):
    width, height = Image.open(path).size
    widths.append(width)
    heights.append(height)
    
train_df["width"] = widths
train_df["height"] = heights
train_df["dimension"] = train_df["width"] * train_df["height"]

## Feature Engineering

In [13]:
def edges_images_gray(class_name):
    classes_df = train_df[train_df['classname'] ==  class_name].reset_index()
    for idx,i in enumerate(np.random.choice(classes_df['path'],2)):
        image = cv2.imread(i)
        gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        edges = sobel(image)
        gray_edges=sobel(gray)
        dimension = edges.shape
        fig = plt.figure(figsize=(8, 8))
        plt.suptitle(classes[class_name])
        plt.subplot(2,2,1)
        plt.imshow(gray_edges)
        plt.subplot(2,2,2)
        plt.imshow(edges[:dimension[0],:dimension[1],0], cmap="gray")
        plt.subplot(2,2,3)
        plt.imshow(edges[:dimension[0],:dimension[1],1], cmap='gray')
        plt.subplot(2,2,4)
        plt.imshow(edges[:dimension[0],:dimension[1],2], cmap='gray')
        plt.show()


In [14]:
for class_name in train_df['classname'].unique():
    edges_images_gray(class_name)

In [15]:
def corners_images_gray(class_name):
    classes_df = train_df[train_df['classname'] ==  class_name].reset_index()
    for idx,i in enumerate(np.random.choice(classes_df['path'],4)):
        image = cv2.imread(i)
        gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        corners_gray = cv2.goodFeaturesToTrack(gray, maxCorners=50, qualityLevel=0.02, minDistance=20)
        corners_gray = np.float32(corners_gray)
        for item in corners_gray:
            x, y = item[0]
            cv2.circle(image, (int(x), int(y)), 6, (0, 255, 0), -1)
        fig = plt.figure(figsize=(16, 16))
        plt.suptitle(classes[class_name])
        plt.subplot(2,2,1)
        plt.imshow(image, cmap="BuGn")
        plt.show()


In [16]:
for class_name in train_df['classname'].unique():
    corners_images_gray(class_name)

In [17]:
def sift_images_gray(class_name):
    classes_df = train_df[train_df['classname'] ==  class_name].reset_index()
    for idx,i in enumerate(np.random.choice(classes_df['path'],4)):
        image = cv2.imread(i)
        gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        sift = cv2.SIFT_create()
        kp, des = sift.detectAndCompute(gray, None)
        kp_img = cv2.drawKeypoints(image, kp, None, color=(0, 255, 0), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        fig = plt.figure(figsize=(16, 16))
        plt.suptitle(classes[class_name])
        plt.subplot(2,2,1)
        plt.imshow(kp_img, cmap="viridis")
        plt.show()

In [None]:
for class_name in train_df['classname'].unique():
    sift_images_gray(class_name)

### Augmentation

In [36]:
def plot_augimages(paths, datagen):
    plt.figure(figsize = (14,28))
    plt.suptitle('Augmented Images')
    
    midx = 0
    for path in paths:
        data = Image.open(path)
        data = data.resize((224,224))
        samples = np.expand_dims(data, 0)
        it = datagen.flow(samples, batch_size=1)
    
        # Show Original Image
        plt.subplot(10,5, midx+1)
        plt.imshow(data)
        plt.axis('off')
    
        # Show Augmented Images
        for idx, i in enumerate(range(4)):
            midx += 1
            plt.subplot(10,5, midx+1)
            
            batch = it.next()
            image = batch[0].astype('uint8')
            plt.imshow(image)
            plt.axis('off')
        midx += 1
    
    plt.tight_layout()
    plt.show()

    
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.10,
    brightness_range=[0.6,1.4],
    channel_shift_range=0.7,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest'
) 

plot_augimages(np.random.choice(train_df['path'],10), datagen)

## Modeling

In [37]:
y_count=len(train_df['classname'].unique())

### Dense Model

In [38]:
X, y = train_df[['path', 'classname']], train_df['classname']

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
# X_train_ = 

# X_train_ = X_train.reshape(X_train.shape[0], 64*64*3)
# X_test_ = X_test.reshape(X_test.shape[0], 64*64*3)

In [41]:
dense_model = Sequential()
dense_model.add(Dense(512, input_shape=(64,64, 3), activation='relu'))
dense_model.add(Dense(256, activation='relu'))
dense_model.add(Flatten())
dense_model.add(Dense(y_count, activation='softmax'))
dense_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
dense_model.summary()

In [42]:
train_generator_custom_model = datagen.flow_from_dataframe(
        X_train,  # This is the source directory for training images
        x_col='path',
        y_col='classname',
        target_size=(64*64,1),  # All images will be resized to 150x150
        batch_size=40,
        class_mode="categorical",
        shuffle=True,
)
val_generator_custom_model = datagen.flow_from_dataframe(
        X_test,  # This is the source directory for training images
        x_col='path',
        y_col='classname',
        target_size=(64*64,1),  # All images will be resized to 150x150
        batch_size=40,
        class_mode="categorical",
        shuffle=True,
)

# We can't use this memory friendly approach with Dense layers directly.

In [43]:
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = load_img(img_path, target_size=(64, 64))
    x = img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in img_paths]
    return np.vstack(list_of_tensors)

In [44]:
X_train

In [45]:
train_tensors = paths_to_tensor(X_train['path']).astype('float32')/255 - 0.5

In [46]:
val_tensors = paths_to_tensor(X_test['path']).astype('float32')/255 - 0.5

In [47]:
labels = y_train
labels = [x.split('c')[-1] for x in labels]
labels = to_categorical(labels)

vlabels = y_test
vlabels = [x.split('c')[-1] for x in vlabels]
vlabels = to_categorical(vlabels)

In [48]:
X_train_ = datagen.flow(train_tensors, labels, batch_size=20)
X_test_ = datagen.flow(val_tensors, vlabels, batch_size=20)

In [None]:
history_custom_model = dense_model.fit(
      X_train_,
      epochs=20,
      steps_per_epoch=20
)

### Convolutional Model

In [None]:
cnn_model = Sequential()

## CNN 1
cnn_model.add(Convolution2D(32,(3,3),activation='relu',input_shape=(64, 64, 3)))
cnn_model.add(BatchNormalization())
cnn_model.add(Convolution2D(32,(3,3),activation='relu',padding='same'))
cnn_model.add(BatchNormalization(axis = 3))
cnn_model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
cnn_model.add(Dropout(0.3))

## Output
cnn_model.add(Flatten())
cnn_model.add(Dense(512,activation='relu'))
cnn_model.add(BatchNormalization())
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(128,activation='relu'))
cnn_model.add(Dropout(0.25))
cnn_model.add(Dense(10,activation='softmax'))

In [None]:
train_generator_cnn_model = datagen.flow_from_dataframe(
        X_train,  # This is the source directory for training images
        x_col='path',
        y_col='classname',
        target_size=(64, 64),  # All images will be resized to 150x150
        batch_size=40,
        class_mode="categorical",
        shuffle=True,
)
val_generator_cnn_model = datagen.flow_from_dataframe(
        X_test,  # This is the source directory for training images
        x_col='path',
        y_col='classname',
        target_size=(64, 64),  # All images will be resized to 150x150
        batch_size=40,
        class_mode="categorical",
        shuffle=True,
)

In [None]:
cnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
history_custom_model = cnn_model.fit(
      train_generator_custom_model,
     validation_data=val_generator_custom_model,
      steps_per_epoch=100,
      epochs=70,
      verbose=2)

### Transfer Learning

In [49]:
vgg19 = VGG19(weights='imagenet', include_top=False, input_shape=(560,560,3))
vgg19.trainable = False
x = GlobalAveragePooling2D()(vgg19.output)
predictions = Dense(y_count, activation='softmax')(x)
model_vgg19 = Model(inputs = vgg19.input, outputs = predictions)
model_vgg19.summary()

In [50]:
model_vgg19.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
rlrp_vgg19 = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss",factor=0.01,patience=2,verbose=2,mode="auto",min_delta=0.0001,cooldown=0,min_lr=0)

In [51]:
train_generator_vgg_19 = datagen.flow_from_dataframe(
        X_train,  # This is the source directory for training images
        x_col='path',
        y_col='classname',
        target_size=(560, 560),  # All images will be resized to 150x150
        batch_size=16,
        class_mode="categorical",
        shuffle=True,
        preprocessing_function=preprocess_input
)

In [52]:
val_generator_vgg_19 = datagen.flow_from_dataframe(
        X_test,  # This is the source directory for training images
        x_col='path',
        y_col='classname',
        target_size=(560, 560),  # All images will be resized to 150x150
        batch_size=16,
        class_mode="categorical",
        shuffle=True,
        preprocessing_function=preprocess_input
)

In [53]:
history_vgg19 = model_vgg19.fit(
      train_generator_vgg_19,
     validation_data=val_generator_vgg_19,
      epochs=2,
      callbacks = [rlrp_vgg19],
      verbose=2)

## Kernel Visuallization

In [54]:
dict_class = {'c0':0, 'c1':1, 'c2': 2, 'c3': 3, 'c4': 4, 'c5':5, 'c6': 6, 'c7':7, 'c8':8, 'c9':9}


In [55]:
def gradcam_visualise(data, model, class_index):
    explainer = GradCAM()
    output = explainer.explain(data, model, class_index=class_index)
    return output

def activation_visualise(image, model, layers):
    explainer = ExtractActivations()
    output = explainer.explain([image], model, layers_name=layers)
    return output

In [56]:
def plot_data_four(class_name, outputs):
    fig = plt.figure(figsize=(16, 16))
    plt.suptitle(classes[class_name])
    plt.subplot(2,2,1)
    plt.imshow(outputs[0])
    plt.subplot(2,2,2)
    plt.imshow(outputs[1])
    plt.subplot(2,2,3)
    plt.imshow(outputs[2])
    plt.subplot(2,2,4)
    plt.imshow(outputs[3])
    plt.show()

In [57]:
def grad_cam(model, df_exp, class_name, class_index, image_size):
    output_data = []
    classes_df = df_exp[df_exp['classname'] ==  class_name].reset_index(drop = True)
    for idx,i in enumerate(np.random.choice(classes_df['path'],4)):
        image = cv2.imread(i)
        image = cv2.resize(image, image_size)
        data = ([image], None)
        output = gradcam_visualise(data, model, class_index)
        output_data.append(output)
    plot_data_four(class_name, output_data)

In [58]:
def activations_model(model, df_exp, class_name, layers, image_size):
    output_data = []
    classes_df = df_exp[df_exp['classname'] ==  class_name].reset_index(drop = True)
    for idx,i in enumerate(np.random.choice(classes_df['path'],4)):
        image = cv2.imread(i)
        image = cv2.resize(image, image_size)
        image = tf.expand_dims(image, axis=0)
        output = activation_visualise([image], model, layers)
        output_data.append(output)
    plot_data_four(class_name, output_data)

In [None]:
for class_name in X_test['classname'].unique():
    grad_cam(model_vgg19, X_test, class_name, dict_class[class_name], (560,560))