In [None]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras import layers

from keras.models import Sequential, Model
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.applications.vgg16 import VGG16

from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, EarlyStopping

from IPython.display import FileLink,display, Image
from PIL import Image as I

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_files     
from sklearn.utils import shuffle
from sklearn.metrics import log_loss

from random import sample
import random

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns

import numpy as np
import pandas as pd

from tqdm import tqdm
from glob import glob
import pickle
import zipfile
import os
import cv2
import timeit
import time
import h5py


In [None]:
# Create folder
models_dir = "saved_models"
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

In [None]:
# check if CUDA is available
if tf.test.is_gpu_available(cuda_only=True):
     print('CUDA is available!  Training on GPU ...')

IMG_SIZE   = 244 if  tf.test.is_gpu_available(cuda_only=True) else 160
COLOR_TYPE = 3
CLASSES    = 10
EPOCHS     = 50
BATCHES    = 50
IMG_SIZE   = 224
TEST_SIZE  = 10


In [None]:
df = pd.read_csv("../input/state-farm-distracted-driver-detection/driver_imgs_list.csv")
df.head()

In [None]:
sns.set()

plt.figure(figsize = (10,5))
# Count the number of images per category
sns.countplot(x = 'classname', color = '#169DE3',data = df)

plt.title('Categories Distribution'.title(),size=22 , color = '#169DE3')
plt.xlabel('classname',size=17 , color = '#169DE3')
plt.ylabel('Count',size=17 , color = '#169DE3')

plt.show()

In [None]:
# Load the dataset from Kaggle
def get_cv2_image(path, img_size, color_type):
    # Loading as Grayscale image
    if color_type == 1:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    # Loading as color image
    elif color_type == 3:
        img = cv2.imread(path, cv2.IMREAD_COLOR)
    # Reduce size
    img = cv2.resize(img[:500], (img_size, img_size)) 
    return img

In [None]:
def load_trainning_data(img_size , color_type):
    start_time = time.time()
    training_images = []
    training_labels = []

    # Loop over the training folder 
    for class_ in tqdm(range(CLASSES)):
        
        print('Loading directory c{}'.format(class_))
        
        files = glob(os.path.join('../input/state-farm-distracted-driver-detection/imgs/train', 'c' + str(class_), '*.jpg'))
        
        for file in files:
            img = get_cv2_image(file, img_size , color_type)
            training_images.append(img)
            training_labels.append(class_) 
    
    print("Data Loaded in {} Min".format((time.time() - start_time)/60))
    return training_images, training_labels 


In [None]:
X, y = load_trainning_data( IMG_SIZE , COLOR_TYPE)

In [None]:
X[0].shape

In [None]:
# Convert Categorical data to numerical
y = np_utils.to_categorical(y, CLASSES)
y[0]

In [None]:
# splitting train data to train and validation
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.15,shuffle=True, random_state=2021)
print(X_train[0])

In [None]:
# convert data to numpy array
X_train = np.array(X_train, dtype=np.uint8).reshape(-1,IMG_SIZE,IMG_SIZE,COLOR_TYPE)
X_valid = np.array(X_valid, dtype=np.uint8).reshape(-1,IMG_SIZE,IMG_SIZE,COLOR_TYPE)

print('Train shape :', X_train.shape)
print('Number of train samples : ',X_train.shape[0])

print('Validation shape :', X_valid.shape)
print('Number of Validation samples : ',X_valid.shape[0])

In [None]:
#shffle training data 

random.shuffle(X_train)

In [None]:
def load_testing_data(test_size,img_size, color_type):

    files = sorted(glob(os.path.join('../input/state-farm-distracted-driver-detection/imgs/test', '*.jpg')))
    testing_image = []
    testing_image_id = []
    
    total = 0
    files_size = len(files)
    
    for file in tqdm(files):
        
        if total == test_size:
            break
            
        file_base = os.path.basename(file)
        img = get_cv2_image(file, img_size, color_type)
        testing_image.append(img)
        testing_image_id.append(file_base)
        
        total += 1
    return testing_image, testing_image_id



In [None]:
test_data, test_ids = load_testing_data(TEST_SIZE, IMG_SIZE, COLOR_TYPE)
test_data = np.array(test_data, dtype=np.uint8)
test_data = test_data.reshape(-1,IMG_SIZE,IMG_SIZE,COLOR_TYPE)

In [None]:
print('Test shape:', test_data.shape)
print(test_data.shape[0], 'Test samples')

In [None]:
# mapping categotical
CAT_MAP = {'c0': 'Safe driving', 
                'c1': 'Texting - right', 
                'c2': 'Talking on the phone - right', 
                'c3': 'Texting - left', 
                'c4': 'Talking on the phone - left', 
                'c5': 'Operating the radio', 
                'c6': 'Drinking', 
                'c7': 'Reaching behind', 
                'c8': 'Hair and makeup', 
                'c9': 'Talking to passenger'}

In [None]:
plt.figure(figsize = (12, 20))
#image_count = 1
DIR = '../input/state-farm-distracted-driver-detection/imgs/train/'

for directory in os.listdir(DIR):
    
    if directory[0] != '.':
        for i, file in enumerate(os.listdir(DIR + directory)):
            if i == 2:
                break
            else:
                #fig = plt.subplot(2, 2, image_count)
                #image_count += 1
                image = mpimg.imread(DIR + directory + '/' + file)
                plt.imshow(image)
                plt.title(CAT_MAP[directory])

In [None]:
print(X_train.shape,y_train.shape)

ResNet-Model

In [None]:
base_model  = tf.keras.applications.resnet.ResNet50(include_top = False,
                                                  weights = 'imagenet',
                                                  input_shape = (224,224,3))
base_model.summary()

In [None]:
x = base_model.output
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.5)(x)

output =tf.keras.layers.Dense(CLASSES,activation = tf.nn.softmax)(x)
model = tf.keras.models.Model(inputs=base_model.inputs, outputs=output)

model.compile(optimizer=tf.keras.optimizers.Adam(0.0001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits = False),
              metrics=['accuracy'])

model.summary()

In [None]:
num_epochs = 50
def lr_schedule(epoch,lr):
    # Learning Rate Schedule

    lr = lr
    total_epochs = num_epochs

    check_1 = int(total_epochs * 0.9)
    check_2 = int(total_epochs * 0.8)
    check_3 = int(total_epochs * 0.6)
    check_4 = int(total_epochs * 0.4)

    if epoch > check_1:
        lr *= 1e-4
    elif epoch > check_2:
        lr *= 1e-3
    elif epoch > check_3:
        lr *= 1e-2
    elif epoch > check_4:
        lr *= 1e-1

    print("[+] Current Lr rate : {} ".format(lr))
    return lr
lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule)

In [None]:
history = model.fit(
      x = X_train,y=y_train,
      validation_data=(X_valid,y_valid),
      steps_per_epoch=16,
      batch_size = 8,
      epochs=num_epochs,
    
    callbacks = [lr_callback],
      verbose=1)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

ax[0].set_title('Training Loss')
ax[0].plot(history.history['accuracy'])
ax[0].plot(history.history['val_accuracy'])

ax[1].set_title('Validation Loss')
ax[1].plot(history.history['loss'])
ax[1].plot(history.history['val_loss'])

In [None]:
#Preparing data augmentation
'''Generate batches of tensor image data with real-time data augmentation.
   The data will be looped over (in batches).'''

train_gen = ImageDataGenerator(rescale = 1.0/255,
                               height_shift_range=0.5,
                               width_shift_range = 0.5,
                               rotation_range=30,
                               validation_split = 0.2)

valid_gen = ImageDataGenerator(rescale=1.0/ 255, validation_split = 0.2)


In [None]:
'''Takes the dataframe and the path to a directory + generates batches.

   The generated batches contain => augmented/normalized data.'''
BATCHES = 50

training_generator = train_gen.flow_from_directory('../input/state-farm-distracted-driver-detection/imgs/train', 
                                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                                 batch_size = BATCHES,
                                                 shuffle=True,
                                                 class_mode='categorical', subset="training")


validation_generator = valid_gen.flow_from_directory('../input/state-farm-distracted-driver-detection/imgs/train', 
                                                   target_size = (IMG_SIZE, IMG_SIZE), 
                                                   batch_size = BATCHES,
                                                   shuffle=False,
                                                   class_mode='categorical', subset="validation")

In [None]:
train_samples = 17943
valid_samples = 4481

In [None]:
valid_samples

In [None]:
train_samples

In [None]:
validation_generator

In [None]:
def plot_train_history(history):
    # Summarize history for accuracy
    plt.figure(figsize = (8, 5))
    #plt.xticks(np.arange(0, 10))
    #plt.yticks(np.arange(0, 100))
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

    # Summarize history for loss
    plt.figure(figsize = (8, 5))
    #plt.xticks(np.arange(0, 10))
    #plt.yticks(np.arange(0, 100))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='lower left')
    plt.show()

VGG16-Model 

In [None]:
def VGG16_MODEL(img_rows=IMG_SIZE, img_cols=IMG_SIZE, color_type=3):
    # Remove fully connected layer and replace
    # with softmax for classifying 10 classes
    vgg16_model_2 = VGG16(weights="imagenet", include_top=False)

    # Freeze all layers of the pre-trained model
    for layer in vgg16_model_2.layers:
        layer.trainable = False
        
    x = vgg16_model_2.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(CLASSES, activation = 'softmax')(x)

    model = Model(inputs = vgg16_model_2.input, outputs = predictions)
    
    return model

In [None]:
print("Loading network...")
model_vgg16_2 = VGG16_MODEL(img_rows=IMG_SIZE, img_cols=IMG_SIZE)

model_vgg16_2.summary()

model_vgg16_2.compile(loss='categorical_crossentropy',
                         optimizer='rmsprop',
                         metrics=['accuracy'])

In [None]:
# Model weights are saved at the end of every epoch
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

In [None]:
checkpoint = ModelCheckpoint(filepath='saved_models/weights_best_vgg16_model2.hdf5', 
                               verbose=1, save_best_only=True)

In [None]:
history = model_vgg16_2.fit_generator(training_generator,
                         steps_per_epoch = 17943/50,
                         epochs = EPOCHS, 
                         callbacks=[early_stopping, checkpoint],
                         verbose = 1,
                         validation_data = validation_generator,
                         validation_steps = 4481/50)


In [None]:
plot_train_history(history)

In [None]:
def prediction():
    for i in np.arange(10):
        img_brute = test_data[i]

        im = cv2.resize(cv2.cvtColor(img_brute, cv2.COLOR_BGR2RGB), (IMG_SIZE,IMG_SIZE)).astype(np.float32) / 255.0
        im = np.expand_dims(im, axis =0)

        img_display = cv2.resize(img_brute,(IMG_SIZE,IMG_SIZE))
        plt.imshow(img_display, cmap='gray')

        y_preds = model_vgg16_2.predict(im, batch_size=BATCHES, verbose=1)
        print(y_preds)
        y_prediction = np.argmax(y_preds)
        print('Y Prediction: {}'.format(y_prediction))
        print('Predicted as: {}'.format(CAT_MAP.get('c{}'.format(y_prediction))))

        plt.show()
prediction()