In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [2]:
df = pd.read_csv('../input/state-farm-distracted-driver-detection/driver_imgs_list.csv')
df.head(5)


In [3]:
len(df)

#SPLIT INTO TRAIN AND TEST

In [4]:
#https://stackoverflow.com/questions/24147278/how-do-i-create-test-and-train-samples-from-one-dataframe-with-pandas
df = df.sample(frac = 1)
tt = np.random.rand(len(df)) < 0.8
train_data = df[tt]
test_data = df[~tt]
print("Train dataset lenght",len(train_data))
print("Test dataset lenght",len(test_data))

The 10 classes to predict are:

c0: normal driving
c1: texting - right
c2: talking on the phone - right
c3: texting - left
c4: talking on the phone - left
c5: operating the radio
c6: drinking
c7: reaching behind
c8: hair and makeup
c9: talking to passenger

In [5]:
types = {'c0': 'normal driving', 
        'c1': 'texting - right', 
        'c2': 'talking on the phone - right', 
        'c3': 'texting - left', 
        'c4': 'talking on the phone - left', 
        'c5': 'operating the radio', 
        'c6': 'drinking', 
        'c7': 'reaching behind', 
        'c8': 'hair and makeup', 
        'c9': 'talking to passenger'}
number_of_class = 10 

LOADING IMAGE IN GRAYSCALE

In [6]:
#https://towardsdatascience.com/convolution-neural-network-for-image-processing-using-keras-dc3429056306

import cv2

def grayscale(path):
    img = cv2.imread(path, 0)
    img = cv2.resize(img, (64, 64))#reduce the size of the image
    return img 

GET train images and their label from the files

In [7]:
from tqdm import tqdm
from glob import glob

def loadtrain():
    train_images = [] 
    train_labels = []
    for classed in tqdm(range(number_of_class)):
        print('Loading directory c{}'.format(classed))
        files = glob(os.path.join('../input/state-farm-distracted-driver-detection/imgs/train/c' + str(classed), '*.jpg'))
        for file in files:
            img = grayscale(file)
            train_images.append(img)
            train_labels.append(classed)
    return train_images, train_labels 

MAKING VALIDATION DATASET FROM TRAIN DATASET

In [8]:
##https://www.kaggle.com/pierrelouisdanieau/computer-vision-tips-to-increase-accuracy

from sklearn.model_selection import train_test_split
from keras.utils import np_utils

def normalize_train_data():
    X, labels = loadtrain()
    y = np_utils.to_categorical(labels, 10)
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    x_train = np.array(x_train, dtype=np.uint8).reshape(-1,64,64,1)
    x_test = np.array(x_test, dtype=np.uint8).reshape(-1,64,64,1)

    return x_train, x_test, y_train, y_test

def loadvalid(size=200000):
    path = os.path.join('..', 'input', 'test', '*.jpg')
    files = sorted(glob(path))
    X_test = []
    X_test_id = []
    total = 0
    files_size = len(files)
    for file in tqdm(files):
        if total >= size or total >= files_size:
            break
        file_base = os.path.basename(file)
        img = grayscale(file)
        X_test.append(img)
        X_test_id.append(file_base)
        total += 1
    return X_test, X_test_id

def normalize_valid_data(size):
    test_data, test_ids = loadvalid(size)   
    test_data = np.array(test_data, dtype=np.uint8)
    test_data = test_data.reshape(-1,64,64,1)
    return test_data, test_ids


In [9]:
sample = 200

x_train, x_test, y_train, y_test = normalize_train_data()

# loading validation images
test_files, test_targets = normalize_valid_data(sample)

In [10]:
import plotly.express as px

px.histogram(df, x="classname", color="classname", title="Number of images by categories ")

In [11]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

plt.figure(figsize = (12, 20))
image_count = 1
BASE_URL = '../input/state-farm-distracted-driver-detection/imgs/train/'
for directory in os.listdir(BASE_URL):
    if directory[0] != '.':
        for i, file in enumerate(os.listdir(BASE_URL + directory)):
            if i == 1:
                break
            else:
                fig = plt.subplot(5, 2, image_count)
                image_count += 1
                image = mpimg.imread(BASE_URL + directory + '/' + file)
                plt.imshow(image)
                plt.title(types[directory])

CNN MODEL

In [12]:
#https://www.kaggle.com/ismailchaida/cnn-to-detect-driver-actions

from keras.callbacks import ModelCheckpoint, EarlyStopping
batch_size = 50
nb_epoch = 5
models_dir = "saved_models"
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
checkpointer = ModelCheckpoint(filepath='saved_models/weights_best.hdf5', 
                               monitor='val_loss', mode='min',
                               verbose=1, save_best_only=True)
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)

In [13]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D

def cnn():
    cnnmodel = Sequential()

    cnnmodel.add(Conv2D(64,(3,3),activation='relu',input_shape=(64, 64, 1)))
    cnnmodel.add(BatchNormalization())

    cnnmodel.add(MaxPooling2D(pool_size=(2,2),padding='same'))
    cnnmodel.add(Dropout(0.3))
    
    cnnmodel.add(Conv2D(128,(3,3),activation='relu',padding='same'))
    cnnmodel.add(BatchNormalization())

    cnnmodel.add(MaxPooling2D(pool_size=(2,2),padding='same'))
    cnnmodel.add(Dropout(0.3))
    
    cnnmodel.add(Conv2D(256,(3,3),activation='relu',padding='same'))
    cnnmodel.add(BatchNormalization())

    cnnmodel.add(MaxPooling2D(pool_size=(2,2),padding='same'))
    cnnmodel.add(Dropout(0.5))

    cnnmodel.add(Flatten())
    cnnmodel.add(Dense(512,activation='relu'))
    cnnmodel.add(BatchNormalization())
    cnnmodel.add(Dropout(0.5))
    cnnmodel.add(Dense(128,activation='relu'))
    cnnmodel.add(Dropout(0.25))
    cnnmodel.add(Dense(10,activation='softmax'))
    
    return cnnmodel

In [14]:
from tensorflow import keras
cnnmodel = cnn()

cnnmodel.summary()
opt = keras.optimizers.Adam()
cnnmodel.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
import timeit
history = cnnmodel.fit(x_train, y_train, 
          validation_data=(x_test, y_test),
          epochs=nb_epoch, batch_size=batch_size, verbose=1)

In [16]:
score = cnnmodel.evaluate(x_test, y_test, verbose=1)

In [17]:
#refrences

#https://www.kaggle.com/ismailchaida/cnn-to-detect-driver-actions
#https://www.kaggle.com/pierrelouisdanieau/computer-vision-tips-to-increase-accuracy
#https://towardsdatascience.com/building-a-convolutional-neural-network-cnn-in-keras-329fbbadc5f5
#https://www.analyticsvidhya.com/blog/2021/06/building-a-convolutional-neural-network-using-tensorflow-keras/