# Problem description

According to the CDC motor vehicle safety division, one in five car accidents is caused by a distracted driver. Sadly, this translates to 425,000 people injured and 3,000 people killed by distracted driving every year.

State Farm hopes to improve these alarming statistics, and better insure their customers, by testing whether dashboard cameras can automatically detect drivers engaging in distracted behaviors. Given a dataset of 2D dashboard camera images, State Farm is challenging Kagglers to classify each driver's behavior. Are they driving attentively, wearing their seatbelt, or taking a selfie with their friends in the backseat?

In [None]:
import os, shutil
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import random

import tensorflow as tf

from keras.models import Sequential
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers

In [None]:
base_dir = "../input/state-farm-distracted-driver-detection/"
img_folder = os.path.join(base_dir, 'imgs/')
train_imgs = os.path.join(img_folder, 'train/')
test_imgs = os.path.join(img_folder, 'test/')
driver_imgs_list = pd.read_csv(os.path.join(base_dir, 'driver_imgs_list.csv'))
sample_sub = pd.read_csv(os.path.join(base_dir, 'sample_submission.csv'))

In [None]:
driver_imgs_list.head()

In [None]:
driver_imgs_list.shape

In [None]:
sample_sub.head()

In [None]:
classes = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']

class_def = {'c0': 'safe driving',
'c1': 'texting - right',
'c2': 'talking on the phone - right',
'c3': 'texting - left',
'c4': 'talking on the phone - left',
'c5': 'operating the radio',
'c6': 'drinking',
'c7': 'reaching behind',
'c8': 'hair and makeup',
'c9': 'talking to passenger'}

Display 20 sample images

In [None]:
fig = plt.figure(figsize=(20, 18))
columns = 5
rows = 4
for i in range(1, columns*rows +1):
    pic_idx = random.randint(0, driver_imgs_list.shape[0])
    im = Image.open(r"../input/state-farm-distracted-driver-detection/imgs/train/"+ 
                    str(driver_imgs_list.loc[pic_idx, 'classname']) +'/' 
                    +str(driver_imgs_list.loc[pic_idx, 'img' ]))
    fig.add_subplot(rows, columns, i)
    plt.imshow(im)
    plt.title('State of driving: ' + class_def[(driver_imgs_list.loc[pic_idx, 'classname'])])
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# size of each image is (640, 480)
im = Image.open("../input/state-farm-distracted-driver-detection/imgs/train/c7/img_100702.jpg")
w, h = im.size
print(w, h)

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
image_size = (256, 256)
val_frac = 0.12
batch_size = 16
train_dir = "../input/state-farm-distracted-driver-detection/imgs/train/"

train_generator = tf.keras.preprocessing.image_dataset_from_directory(train_dir,
                                                             labels = 'inferred',
                                                             label_mode='categorical',
                                                             image_size=image_size,
                                                             batch_size=batch_size,
                                                             seed=1,
                                                             shuffle=True,
                                                             validation_split=val_frac,
                                                             subset='training')
val_generator = tf.keras.preprocessing.image_dataset_from_directory(train_dir,
                                                             labels = 'inferred',
                                                             label_mode='categorical',
                                                             image_size=image_size,
                                                             batch_size=batch_size,
                                                             seed=1,
                                                             shuffle=True,
                                                             validation_split=val_frac,
                                                             subset='validation')

In [None]:
def normalize(image,label):
    image = tf.cast(image/255. ,tf.float32)
    return image,label

train_generator = train_generator.map(normalize)
val_generator = val_generator.map(normalize)

In [None]:
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', mode='min', patience=4, verbose=1)

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu',input_shape=(256, 256, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile('rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(
train_generator,
steps_per_epoch=240,
epochs=10,
validation_data=val_generator,
validation_steps=60)

In [None]:
model.save('distracted_driver.h5')

In [None]:
tr_loss = history.history['loss']
tr_acc = history.history['accuracy']
val_loss = history.history['val_loss']
val_acc = history.history['val_accuracy']
epochs = range(1, len(tr_loss)+1)

plt.clf()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(18,5))
fig

ax1.plot(epochs, tr_loss, 'b', label='Training loss')
ax1.plot(epochs, val_loss, 'r', label='Validation loss')
ax1.set_title('Training & Validation loss')
ax1.set_xlabel('epochs')
ax1.set_ylabel('loss')
ax1.legend()

ax2.plot(epochs, tr_acc, 'b', label='Training acc')
ax2.plot(epochs, val_acc, 'r', label='Validation acc')
ax2.set_title('Training & Validation acc')
ax2.set_xlabel('epochs')
ax2.set_ylabel('accuracy')
ax2.legend()
plt.show()

# Prediction on test images & submission

img_path = "../input/state-farm-distracted-driver-detection/imgs/"
test_generator = test_datagen.flow_from_directory(
    directory=img_path,
    target_size=image_size,
    color_mode="rgb",
    batch_size=1,
    class_mode='categorical',
    shuffle=False,
    classes=['test']
)

preds = model.predict(test_generator, steps=79726)

test_ids = sorted(os.listdir(test_imgs))
pred_df = pd.DataFrame(columns = ['img','c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
for i in range(len(preds)):    
    pred_df.loc[i, 'img'] = test_ids[i]
    pred_df.loc[i, 'c0':'c9'] = preds[i]

pred_df.to_csv('predictions.csv', index=False)