In [None]:
#importing the necessary packages 

import numpy as np
import pandas as pd
import os 
from PIL import Image
import matplotlib.pyplot as plt

import random
import tensorflow as tf
from keras.models import Sequential
from keras.layers import MaxPooling2D,Conv2D,Dense,Dropout,Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers

In [None]:
# setting up the data paths

base_dir = '../input/state-farm-distracted-driver-detection/' # base directory
images_dir = os.path.join(base_dir,'imgs/')                   # images directory
test_dir = os.path.join(base_dir,'imgs/test/')                # test directory
train_dir = os.path.join(base_dir,'imgs/train/')              # train directory

driver_imgs_list = pd.read_csv(os.path.join(base_dir,'driver_imgs_list.csv'))  # images list csv 
sample_sub = pd.read_csv(os.path.join(base_dir,'sample_submission.csv'))       # sample submission

In [None]:
# lets look the image details 
driver_imgs_list.head()

In [None]:
# checking n unique classes avaliable 
classes = driver_imgs_list['classname'].unique()
classes

In [None]:
# keepsing some description for each class based on the driver position to have some clear idea 

class_def = {'c0': 'safe driving',
'c1': 'texting - right',
'c2': 'talking on the phone - right',
'c3': 'texting - left',
'c4': 'talking on the phone - left',
'c5': 'operating the radio',
'c6': 'drinking',
'c7': 'reaching behind',
'c8': 'hair and makeup',
'c9': 'talking to passenger'}



In [None]:
# lets check the class distribution 

driver_imgs_list.groupby('classname')['img'].count().sort_values().plot(kind='bar')
plt.ylabel('Images range')
plt.show()

In [None]:
fig = plt.figure(figsize=(20,18)) # image size
cols,rows = 5,4                   

for i in range(1,cols*rows+1):
    pic_indx = random.randint(0,driver_imgs_list.shape[0])
    img = Image.open(os.path.join(base_dir,'imgs/train/')+str(driver_imgs_list.loc[pic_indx,'classname']+'/')
                    + str(driver_imgs_list.loc[pic_indx,'img']))
    fig.add_subplot(rows,cols,i)
    plt.imshow(img)
    plt.title('State of Driving :'+ class_def[driver_imgs_list.loc[pic_indx,'classname']])
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# size of each image

img = Image.open('../input/state-farm-distracted-driver-detection/imgs/test/img_1.jpg')
print(img.size)


In [None]:
# creating the train data generator and test data generator
 
image_size = (128,128)  # image shape
batch_size = 32
val_size = 0.2

train_data_gen = ImageDataGenerator(rescale=1./127,validation_split= val_size)
test_data_gen = ImageDataGenerator(rescale=1./127)

In [None]:
# defining the training parameters


train_generator = train_data_gen.flow_from_directory(train_dir,
                                                     target_size = image_size,
                                                     batch_size = batch_size,
                                                     seed=42, 
                                                     shuffle=True,
                                                     subset='training')

val_generator =  train_data_gen.flow_from_directory(train_dir,
                                               target_size = image_size,
                                               batch_size = batch_size,
                                               seed=42, 
                                               shuffle=True,
                                               subset='validation')

In [None]:
# check the data for one batch

for data_batch,label_batch in train_generator:
    print(data_batch.shape)   # train batch
    print(label_batch.shape)  # label batch
    break

In [None]:
model = Sequential()                                                  # creating a sequential model
model.add(Conv2D(32,(3,3),activation='relu',input_shape=(128,128,3))) # taking units of 32 and filter of 3x3
model.add(MaxPooling2D(2,2))                                          # maxpool layer with 2x2 filter   
model.add(Conv2D(64,(3,3),activation='relu'))                         # taking units of 64 and filter of 3x3
model.add(MaxPooling2D(2,2))
model.add(Conv2D(128,(3,3),activation='relu'))                        # taking units of 128 and filter of 3x3
model.add(MaxPooling2D(2,2))
model.add(Conv2D(256,(3,3),activation='relu'))                        # taking units of 256 and filter of 3x3
model.add(MaxPooling2D(2,2))
model.add(Conv2D(512,(3,3),activation='relu'))                        # taking units of 512 and filter of 3x3
model.add(MaxPooling2D(2,2))    
model.add(Flatten())                                                  # flattening the data to feed into to Dense layer
model.add(Dense(1024,activation='relu'))                              # taking units of 1024
model.add(Dense(512,activation='relu'))                               # taking units of 512 
model.add(Dense(128,activation='relu'))                               # taking units of 128
model.add(Dense(10,activation='softmax'))                             # output later with units of 10 since 10 labels

model.summary() # to print summary of model architecture

In [None]:
model.compile('adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
epochs = 10

history = model.fit(train_generator,
         steps_per_epoch=240,
         epochs=epochs,
         validation_data=val_generator,
         validation_steps=60
        )

In [None]:
model.save('distracted_driver_acc98.h5')

In [None]:
tr_loss = history.history['loss']
tr_accuracy = history.history['accuracy']

val_loss = history.history['val_loss']
val_accuracy = history.history['val_accuracy']

In [None]:
epchs = list(range(1,len(tr_loss)+1))
plt.plot(epchs,tr_loss,label='Train')
plt.plot(epchs,val_loss,label='Test')
plt.title("Training and Validation loss")
plt.legend()
plt.show()


plt.plot(epchs,tr_accuracy,label='Train')
plt.plot(epchs,val_accuracy,label='Test')
plt.title("Training and Validation accuracy")
plt.legend()
plt.show()

In [None]:
test_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

test_data = test_gen.flow_from_directory(
    images_dir,
    shuffle = False,
    target_size = image_size,
    classes = ['test'],
    batch_size = 32
)

In [None]:
preds = model.predict(test_data)

In [None]:
# predictions on one batch of images

for data_batch,label_batch in test_data:
#     print(data_batch.shape,label_batch.shape)
    print(model.predict(data_batch).argmax(axis=1))
    break