In [None]:
import os
from glob import glob #used to access a file specified by a path
import random
import tensorflow
os.environ['KERAS_BACKEND'] = 'tensorflow'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # 3 = INFO, WARNING, and ERROR messages are not printed

from tqdm import tqdm  #Used to print progress bars

import numpy as np
import pandas as pd
from IPython.display import FileLink
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
from IPython.display import display, Image
import matplotlib.image as mpimg
import cv2 #Computer Vision Library

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_files       
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.metrics import log_loss


In [None]:
import csv
data={}  #This dictionary will contain the classes as keys and a list of images belonging to that class as values
with open('../input/state-farm-distracted-driver-detection/driver_imgs_list.csv') as f:
    reader = csv.reader(f)
    next(reader) #to avoid taking column names
    for row in reader:
        #print(row[1])
        key = row[1].lower()
        if(key in data):
            #print(data[key])
            data[key].append(row[2])
        else:
            data[key] = [row[2]]
        

In [None]:
class_list = list(data.keys())
class_list

In [None]:
import os
os.mkdir("Master_Data")
os.mkdir("Master_Data/Training")
os.mkdir("Master_Data/Testing")

In [None]:
for x in class_list:
    os.mkdir(os.path.join("Master_Data/Training",x))
    os.mkdir(os.path.join("Master_Data/Testing",x))

In [None]:
import shutil as sh
split_size = 0.8

In [None]:
#This Code Snippet would copy 80% of the images in original input folder to the Training Folder and 20% to the Testing folder
for clas,images in data.items():
  train_size = int(len(images)*split_size)
  train_images=images[:train_size]
  test_images=images[train_size:]
  for image in train_images:
    source = os.path.join("../input/state-farm-distracted-driver-detection/imgs/train",clas,image)
    dest = os.path.join("./Master_Data/Training",clas)
    sh.copy(source,dest)
  for image in test_images:
    source = os.path.join("../input/state-farm-distracted-driver-detection/imgs/train",clas,image)
    dest = os.path.join("./Master_Data/Testing",clas)
    sh.copy(source,dest)   

Now we have Two directories, train and test under Master Data, each of which has 10 subdirectories which contain images belonging to that category

In [None]:
# Load the dataset previously downloaded from Kaggle
NUMBER_CLASSES = 10
# Color type: 1 - grey, 3 - rgb

def get_cv2_image(path, img_rows, img_cols, color_type=3):
    # Loading as Grayscale image
    if color_type == 1:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)  #Converts a jpeg image to its pixel matrix
    elif color_type == 3:
        img = cv2.imread(path, cv2.IMREAD_COLOR)
    # Reduce size
    img = cv2.resize(img, (img_rows, img_cols)) 
    return img

# Training
def load_train(img_rows, img_cols, color_type=3):
    train_images = [] 
    train_labels = []
    # Loop over the training folder 
    for classed in tqdm(range(NUMBER_CLASSES)):   #prints the progress bar as well
        print('Loading directory c{}'.format(classed))
        files = glob(os.path.join('..', 'input', 'state-farm-distracted-driver-detection','imgs','train', 'c' + str(classed), '*.jpg')) # This will fetch all files which end with .jpg
        for file in files:
            img = get_cv2_image(file, img_rows, img_cols, color_type)
            train_images.append(img)
            train_labels.append(classed)
    return train_images, train_labels 

def read_and_normalize_train_data(img_rows, img_cols, color_type):
    X, labels = load_train(img_rows, img_cols, color_type)
    y = np_utils.to_categorical(labels, 10)  #Used to one hot encode the labels
    x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    x_train = np.array(x_train, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type) #reshaping (rows,cols) to (rows,cols,1) to match the CNN input
    x_val = np.array(x_val, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    
    return x_train, x_val, y_train, y_val



In [None]:
img_rows = 64
img_cols = 64
color_type = 1

In [None]:
x_train, x_val, y_train, y_val = read_and_normalize_train_data(img_rows, img_cols, color_type)
print('Train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout,BatchNormalization,MaxPooling2D
from keras.regularizers import *

In [None]:
model = Sequential()

## CNN 1
model.add(Conv2D(32,(3,3),activation='relu',input_shape=(img_rows, img_cols, color_type)))
model.add(BatchNormalization())
model.add(Conv2D(32,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.3))

## CNN 2
model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.3))

## CNN 3
model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization(axis = 3))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.5))

## Output
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(10,activation='softmax')) # We use softmax in the last layer because it convertsoutput of last layer into probability distribution

model.summary()
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
batch_size = 40
nb_epoch = 10
model.fit(x_train, y_train, 
          validation_data=(x_val, y_val),
          epochs=nb_epoch, batch_size=batch_size, verbose=1)

In [None]:
#Accessing the test images

test_images = [] 
test_labels = []
# Loop over the training folder 
for classed in tqdm(range(NUMBER_CLASSES)): 
    files = glob(os.path.join('.', 'Master_Data','Testing', 'c' + str(classed), '*.jpg'))
    for file in files:
        img = get_cv2_image(file, img_rows, img_cols, color_type)
        test_images.append(img)
        test_labels.append(classed)

x_test_final = test_images
x_test_final = np.array(x_test_final, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
y_test = np_utils.to_categorical(test_labels, 10)

In [None]:
model.evaluate(x_test_final,y_test)

99% Accuracy