# CS421 Introduction to Machine Learning
## Categorizing 10 classes of Distracted Driving using CNN, ResNet, VGG16 and Ensemble
### Hannah Caitlin TAN, Xuan Ni Rachel CHUA, Yi Long NGOH [G1 Team 9] 

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Layer
from keras.layers.convolutional import Convolution2D, MaxPooling2D, \
                                       ZeroPadding2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.models import model_from_json


In [None]:
import os
import tensorflow
os.environ['KERAS_BACKEND'] = 'tensorflow'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # 3 = INFO, WARNING, and ERROR messages are not printed

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Layer
from keras.preprocessing.image import ImageDataGenerator

In [None]:
statefarmFolder = "../input/state-farm-distracted-driver-detection"
train_dir = statefarmFolder + '/train/'
test_dir = statefarmFolder + '/test/'
dataset = pd.read_csv(statefarmFolder + '/driver_imgs_list.csv')
dataset.head(5)


In [None]:
import os
from IPython.display import display, Image
import matplotlib.image as mpimg

activity_map = {'c0': 'Safe driving', 
                'c1': 'Texting - right', 
                'c2': 'Talking on the phone - right', 
                'c3': 'Texting - left', 
                'c4': 'Talking on the phone - left', 
                'c5': 'Operating the radio', 
                'c6': 'Drinking', 
                'c7': 'Reaching behind', 
                'c8': 'Hair and makeup', 
                'c9': 'Talking to passenger'}

plt.figure(figsize = (12, 20))
image_count = 1
train_dir = statefarmFolder + '/train/'
test_dir = statefarmFolder + '/test/'
for directory in os.listdir(train_dir):
    if directory[0] != '.':
        for i, file in enumerate(os.listdir(train_dir + directory)):
            if i == 1:
                break
            else:
                fig = plt.subplot(5, 2, image_count)
                image_count += 1
                image = mpimg.imread(train_dir + directory + '/' + file)
                plt.imshow(image)
                plt.title(activity_map[directory])

In [None]:
# model = Sequential()
# model.add(ZeroPadding2D((1, 1), input_shape=(240, 240, 3)))
# model.add(Convolution2D(64, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(64, 3, 3, activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(128, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(128, 3, 3, activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(256, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(256, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(256, 3, 3, activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(512, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(512, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(512, 3, 3, activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(512, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(512, 3, 3, activation='relu'))
# model.add(ZeroPadding2D((1, 1)))
# model.add(Convolution2D(512, 3, 3, activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# model.add(Flatten())
# model.add(Dense(4096, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(4096, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(1000, activation='softmax'))

# model.load_weights('../input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5')

# model.layers.pop()
# model.add(Dense(10, activation='softmax'))
# # Learning rate is changed to 0.001
# sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics = ['accuracy'])


In [None]:
from keras.models import Model
from keras.layers import Flatten, Dense
from keras.applications import VGG16

sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
vgg = VGG16(input_shape = (240, 240,3), weights = 'imagenet', include_top = False)

for layer in vgg.layers:
    layer.trainable = False
    
x = Flatten()(vgg.output)
x = Dense(10, activation = 'softmax')(x)

model = Model(inputs = vgg.input, outputs = x)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics = ['accuracy'])

In [None]:
train_datagen = ImageDataGenerator(rescale = 1.0/255, 
                                   shear_range = 0.2, 
                                   zoom_range = 0.2, 
                                   horizontal_flip = True, # flip is True 
                                   validation_split = 0.2)

training_set = train_datagen.flow_from_directory(train_dir, 
                                                 target_size = (240, 240), 
                                                 batch_size = 32,
                                                 subset = 'training')

validation_set = train_datagen.flow_from_directory(train_dir, 
                                                   target_size = (240, 240), 
                                                   batch_size = 32,
                                                   subset = 'validation')

In [None]:
model.fit_generator(training_set,
                         steps_per_epoch = 17943/32,
                         epochs = 10,
                         validation_data = validation_set,
                         validation_steps = 4481/32)

In [None]:
test_datagen = ImageDataGenerator(rescale=1.0/255)

test_generator = test_datagen.flow_from_directory(statefarmFolder, 
                                                 target_size = (240,240), 
                                                 batch_size = 32,
                                                 shuffle = False,
                                                 classes=['test'])

In [None]:
test_generator.reset()

pred = model.predict_generator(test_generator)
predictions = pd.DataFrame(pred)
predictions.to_csv('base.csv')

<a href='base.csv'> Download File </a> 