In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load


# import packages
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import tensorflow as tf
import pathlib

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
tf.test.is_gpu_available()

Load data into datasets
=

In [None]:
# define path to training data and count number of images
data_dir1 = '../input/state-farm-distracted-driver-detection/imgs/train'
data_dir = pathlib.Path(data_dir1)

image_count = len(list(data_dir.glob('*/*.jpg')))
print(image_count)

In [None]:
batch_size = 64
img_width = 200
img_height = int(img_width/640*480)
print(img_height)
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  shuffle=True,
  image_size=(img_height, img_width),
  batch_size=batch_size)
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  shuffle=True,
  image_size=(img_height, img_width),
  batch_size=batch_size)

class_names = train_ds.class_names
print(class_names)
# train_ds.class_names = ['safe_drive', 'text_r', 'phone_r', 'text_l', 'phone_l', 'radio', 'drink', 'reach_bhd', 'hair_mkup', 'talk_passenger']
# val_ds.class_names = ['safe_drive', 'text_r', 'phone_r', 'text_l', 'phone_l', 'radio', 'drink', 'reach_bhd', 'hair_mkup', 'talk_passenger']
class_names = train_ds.class_names
print(class_names)


In [None]:
# plot selection of training images
plt.figure(figsize=(25, 25))
for images, labels in train_ds.take(3):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

Define data augmentation layers
==

In [None]:
img_augmentation = Sequential(
    [
        layers.RandomTranslation(height_factor=0.1, width_factor=(0,0.1), input_shape=(img_height, img_width, 3)),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.1),
        layers.RandomZoom(-0.2, 0.1),
        layers.RandomContrast(factor=(0.2,0)),
    ],
    name="img_augmentation",
)

In [None]:
# view augmented images
plt.figure(figsize=(25, 25))
for image, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        aug_img = img_augmentation(tf.expand_dims(image[0], axis=0))
        plt.imshow(aug_img[0].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

Speed up data loading with cache and prefetch
==

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(100).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

Build simple CNN model
=

In [None]:
num_classes = len(class_names)

model = Sequential([

    img_augmentation,
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    
    layers.Conv2D(16, 3, padding='same'),
#     layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    
    layers.Conv2D(32, 3, padding='same'),
#     layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    
    layers.Conv2D(64, 3, padding='same'),
#     layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['sparse_categorical_accuracy'])
model.summary()

In [None]:
epochs=200
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

In [None]:
model.evaluate(x=val_ds)
model.evaluate(x=train_ds)

Analyse results
=

In [None]:
# plot accuracy and loss history
acc = history.history['sparse_categorical_accuracy']
val_acc = history.history['val_sparse_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(20, 8))
plt.subplot(1, 2, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim((0.8,1))
plt.grid(True)
plt.xlabel("Epochs (-)")
plt.ylabel("Accuracy (-)")
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylim((0.0,0.5))
plt.grid(True)
plt.xlabel("Epochs (-)")
plt.ylabel("Loss (-)")
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# Check log loss equation
p = model.predict(val_ds)
labels = np.concatenate([y for x, y in val_ds], axis=0)
y = tf.one_hot(labels, 10)
p = p / np.sum(p, axis=1, keepdims=True)
p[p > 1-1E-15] = 1-1E-15
p[p < 1e-15] = 1e-15

log_loss = - np.sum(y * np.log(p))/p.shape[0]
print(log_loss)


In [None]:
# plot confusion matrix
predictions = np.argmax(model.predict(val_ds), axis=1)
labels = np.concatenate([y for x, y in val_ds], axis=0)
confusMatrx = tf.math.confusion_matrix(labels, predictions)
print(confusMatrx)

import seaborn as sns
plt.figure(figsize=(20, 12))
# ax = sns.heatmap(10*confusMatrx/np.sum(confusMatrx), annot=True, cmap='Blues')
ax = sns.heatmap(confusMatrx, annot=True, cmap='Blues')

ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['safe_drive', 'text_r', 'phone_r', 'text_l', 'phone_l', 'radio', 'drink', 'reach_bhd', 'hair_mkup', 'talk_passenger'])
ax.yaxis.set_ticklabels(['safe_drive', 'text_r', 'phone_r', 'text_l', 'phone_l', 'radio', 'drink', 'reach_bhd', 'hair_mkup', 'talk_passenger'])

## Display the visualization of the Confusion Matrix.
plt.show()

In [None]:
# save model
model.save('simple_CNN_v1.h5')

Generating test set report
=========

In [None]:
# load trained model
import tensorflow as tf
from tensorflow import keras
model = tf.keras.models.load_model('simple_CNN.h5')

In [None]:
# check number of test images
import fnmatch
import os

test_directory = '../input/state-farm-distracted-driver-detection/imgs/test'
test_img_names = fnmatch.filter(os.listdir(test_directory), '*.jpg')
print(test_img_names[0:10])
n_test_images = len(test_img_names)
print(n_test_images)

In [None]:
# load test images into a tf dataset
test_images = tf.keras.utils.image_dataset_from_directory(
    test_directory,
    label_mode=None,
    image_size=(img_height, img_width))

In [None]:
# make predictions on test dataset
predictions = model.predict(test_images)

In [None]:
# compute scores on predictions using softmax
score = tf.nn.softmax(predictions)
score = score.numpy()

# export scores to csv through a pandas dataframe
import pandas as pd
df = pd.DataFrame(score, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'], index=test_img_names)
df.to_csv('submit_result_v1.csv')

In [None]:
# check prediction for a single image
img_path = test_directory + '/' + test_img_names[0]
img = tf.keras.utils.load_img(img_path, target_size=(img_height, img_width))
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch

prediction = model.predict(img_array)
score = tf.nn.softmax(prediction[0])

print(
    "This image most likely belongs to {} with a {:.2f} percent confidence."
    .format(class_names[np.argmax(score)], 100 * np.max(score)))