# State Farm Distracted Driver Detection

In [None]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2 #opencv library
import random
import h5py

import matplotlib.pyplot as plt  #plotting library
import matplotlib.image as mpimg
import seaborn as sns
from PIL import Image
from IPython.display import Image, SVG

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import tensorflow
from keras import layers, models, optimizers
from keras.utils import np_utils
from keras.utils.vis_utils import plot_model, model_to_dot
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.preprocessing.image import img_to_array, load_img
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
print(os.listdir("../input"))

In [None]:
# Set Constant Values
num_train = 1000
num_test = 1000
img_width = 240
img_height = 240
batch_size = 64
nb_epochs = 12

## Exploratory Analysis

In [None]:
# Define paths
train_path = '../input/state-farm-distracted-driver-detection/imgs/train'
test_path = '../input/state-farm-distracted-driver-detection/imgs/test/'
classes = ['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9']
driver_img_list = '../input/state-farm-distracted-driver-detection/driver_imgs_list.csv'
# read image csv file
img_list = pd.read_csv(driver_img_list)

In [None]:
for category in classes:
    i=0
    path = os.path.join(train_path,category)
    for img in os.listdir(path):
           i+=1
    print('Numer of instances of class {} in Train: {}'.format(category, i))

i=0
for img in os.listdir(test_path):
    i+=1
print('\nTotal number of images in Test: ', i)

In [None]:
nf = img_list['classname'].value_counts(sort=False)
labels = img_list['classname'].value_counts(sort=False).index.tolist()
y = np.array(nf)
width = 1/1.5
N = len(y)
x = range(N)

fig = plt.figure(figsize=(20,15))
ay = fig.add_subplot(211)
plt.xticks(x, labels, size=15)
plt.yticks(size=15)
ay.bar(x, y, width, color="blue")
plt.title('Class Distribution',size=25)
plt.xlabel('Class Name',size=15)
plt.ylabel('Count',size=15)
plt.show()

In [None]:
# explanation for each of the classes
class_dict = {
    'c0': 'hands on the wheel',
    'c1': 'mobile in right hand',
    'c2': 'talking on the phone with right hand',
    'c3': "mobile in left hand",
    'c4': 'talking on the phone with left hand',
    'c5': 'touching at the dash',
    'c6': 'drinking',
    'c7': 'reaching behind',
    'c8': 'touching the head',
    'c9': 'looking to the side'
}

In [None]:
# Sample Image for each class
for i in classes:
    path = os.path.join(train_path, i)
    print("Class ", i, ': ', class_dict[i])
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_COLOR)
        plt.imshow(img_array, cmap='gray')
        plt.show()
        print('\n')
        break

## Create Training and Testing Data

In [None]:
def create_training_data():
    training_data = []
    for category in classes:
        i=0
        path = os.path.join(train_path,category)
        class_num = classes.index(category)
        for img in os.listdir(path):
            # return num_train instances of each class
            if i < num_train:
                img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
                # resize image
                new_img = cv2.resize(img_array,(img_width,img_height))
                # get image and class type
                training_data.append([new_img, class_num])
                i+=1
    return training_data

In [None]:
def create_testing_data():
    testing_data = []
    i=0
    for img in os.listdir(test_path):
        # return num_test test images
        if i < num_test:
            img_array = cv2.imread(os.path.join(test_path,img), cv2.IMREAD_GRAYSCALE)
            new_img = cv2.resize(img_array,(img_width,img_height))
            testing_data.append([img, new_img])
            i+=1
    return testing_data

In [None]:
training_data = create_training_data()
testing_data = create_testing_data()

In [None]:
# Shuffling data
random.shuffle(training_data)
x, y = list(), list()
for features, label in training_data:
    x.append(features)
    y.append(label)

#### Convert y to dummy variables

In [None]:
y[0:10]

In [None]:
Y = np_utils.to_categorical(y, num_classes=10)
Y[0:10]

#### Reshape

In [None]:
# Reshaping the image to fit the batch size (batch count,w,h,c)
X = np.array(x).reshape(-1,img_width,img_height,1)
X[0].shape

### Split into Train/test sets using train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X, Y,test_size=0.2,random_state=123)

In [None]:
print("Shape of train images is:", X_train.shape)
print("Shape of validation images is:", X_test.shape)
print("Shape of labels is:", y_train.shape)
print("Shape of labels is:", y_test.shape)

### Creating model architecture

In [None]:
# build the model
model = Sequential()

## CNN 1
model.add(Conv2D(64,(3,3),activation='relu',input_shape=(img_width,img_height,1)))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.2))

## CNN 2
model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.1))

## CNN 3
model.add(Conv2D(256,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(256,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.1))

## CNN 3
model.add(Conv2D(512,(5,5),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.1))

## Dense & Output
model.add(Flatten())
model.add(Dense(units = 256,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.1))
model.add(Dense(units = 128,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10,activation='softmax'))

In [None]:
model.summary()

In [None]:
plot_model(model,show_shapes=True)
Image(filename = 'model.png')

### Compile and fit model

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
callbacks = [EarlyStopping(monitor='val_acc',patience=5), ModelCheckpoint('weights.h5', monitor='val_loss', save_best_only=True, verbose=0, mode='auto'),]
history = model.fit(X_train,y_train,batch_size=batch_size,epochs=nb_epochs,verbose=1,validation_data=(X_test,y_test),callbacks=callbacks)

In [None]:
import matplotlib.pyplot as plt

loss = history.history['loss']
val_loss = history.history['val_loss']
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']


plt.figure(figsize=(7, 5))
plt.plot(loss)
plt.plot(val_loss)
plt.xlabel('Epochs')
plt.title('Training and validation loss')
plt.legend(['Train Loss','Test Loss'], loc='best')
plt.savefig('losses.png')

plt.figure(figsize=(7, 5))
plt.plot(accuracy)
plt.plot(val_accuracy)
plt.xlabel('Epochs')
plt.title('Training and validation Accuracy')
plt.legend(['Train Acc','Test Acc'], loc='best')
plt.savefig('accuracy.png')

## Predict and Evaluate Model

#### Predict on Validation Data

In [None]:
y_val_pred = []
y_val_actual = []
for n in range(len(X_test)):
    preds = model.predict(np.array([X_test[n]]))
    y_val_pred.append(np.argmax(preds[0]))
    y_val_actual.append(np.nonzero(y_test[n])[0][0])

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix

In [None]:
# Confusion Matrix
data = {'y_Actual':    y_val_actual,
        'y_Predicted': y_val_pred}
df = pd.DataFrame(data, columns=['y_Actual','y_Predicted'])
confusion_matrix = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['Actual'], colnames=['Predicted'])

plt.figure(figsize=(10,10))
sns.heatmap(confusion_matrix, annot=True)
plt.show()

In [None]:
print(classification_report(y_val_actual, y_val_pred, target_names=class_dict.keys()))

### Predict on Testing Data

In [None]:
y_pred = []
for n in range(num_test):
    test_img = np.array(testing_data[n][1]).reshape(-1,img_width,img_height,1)
    preds = model.predict(test_img)
    class_idx = np.argmax(preds[0])
    y_pred.append(class_idx)

In [None]:
# Sample Predictions
for n in range(120,130):
    img_array = cv2.imread(os.path.join(test_path, testing_data[n][0]), cv2.IMREAD_COLOR)
    plt.imshow(img_array, cmap='gray')
    title_val = y_pred[n]
    plt.title('Predicted c{}: {}'.format(title_val, class_dict['c{}'.format(title_val)]))
    plt.show()
    print('\n')