# State Farm Distracted Driver Detection


[State Farm Distracted Driver Detection](https://www.kaggle.com/c/state-farm-distracted-driver-detection#evaluation)

## Action Plan
### 1. Data Preparation and Preprocessing
### 2. Finetune and Train Model
### 2b. Data Augmentation parameter tuning
### 3. Generate and Validate Predictions 
### 4. Submit predictions to Kaggle

# 2. Finetune and Train Model

## Setup 

In [5]:
#%cd "~/kaggle/state-farm-driver-detection/code"
%pwd

'/home/ubuntu/kaggle/state-farm-driver-detection/code'

In [6]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
CODE_HOME_DIR = current_dir
DATA_HOME_DIR = CODE_HOME_DIR + '/../input/'
print(CODE_HOME_DIR)
print(DATA_HOME_DIR)

/home/ubuntu/kaggle/state-farm-driver-detection/code
/home/ubuntu/kaggle/state-farm-driver-detection/code/../input/


In [46]:
#import modules
from importlib import reload

import utils; reload(utils)
from utils import *

#import vgg16bn_ted
#from vgg16bn_ted import Vgg16BN; 

%matplotlib inline

#### Setup Paths

In [8]:
#%cd $DATA_HOME_DIR

#Set path to sample/ path if desired
path = DATA_HOME_DIR + '/' # + '/sample/' 
results_path = path + 'results/'
train_path = path + 'train/'
valid_path = path + 'valid/'
test_path = path + 'test/'
model_path = path + 'models/'
if not os.path.exists(model_path): os.makedirs(model_path)
    
batch_size = 64

#### Setup and save Data Classes, Labels, and Filenames

In [53]:
(trn_classes, val_classes, trn_labels, val_labels, trn_filenames, val_filenames, test_filenames) = get_classes(path)

Found 17940 images belonging to 10 classes.
Found 4484 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


In [54]:
save_array(results_path+'train_classes.bc', trn_classes)
save_array(results_path+'valid_classes.bc', val_classes)

In [55]:
trn_classes = np.array(load_array(results_path+'train_classes.bc'))
val_classes = np.array(load_array(results_path+'valid_classes.bc'))
trn_labels = onehot(trn_classes)
val_labels = onehot(val_classes)

In [56]:
save_array(results_path+'train_filenames.bc', trn_filenames)
save_array(results_path+'valid_filenames.bc', val_filenames)
save_array(results_path+'test_filenames.bc', test_filenames)

In [57]:
trn_filenames = load_array(results_path+'train_filenames.bc')
val_filenames = load_array(results_path+'valid_filenames.bc')
test_filenames = load_array(results_path+'test_filenames.bc')

In [58]:
trn_data = get_data(train_path)
val_data = get_data(valid_path)

Found 17940 images belonging to 10 classes.
Found 4484 images belonging to 10 classes.


In [59]:
save_array(results_path+'trn_data.bc', trn_data)
save_array(results_path+'val_data.bc', val_data)

In [60]:
trn_data = load_array(results_path+'trn_data.bc')
val_data = load_array(results_path+'val_data.bc')

In [61]:
batches = get_batches(train_path, batch_size=batch_size)
val_batches = get_batches(valid_path, shuffle=False, batch_size=batch_size)

Found 17940 images belonging to 10 classes.
Found 4484 images belonging to 10 classes.


## Method 1: First training by Resnet model

In [85]:
from keras.applications.resnet50 import ResNet50
from keras.layers import Input, Lambda

In [86]:
rn_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((1,1,3)) #RGB
inp = Input(shape=(256, 256, 3))
#inp = Input(shape=(224, 224, 3))
preproc = Lambda(lambda x: (x - rn_mean)[:,:,:,::-1])(inp) # Turn into BGR
model = ResNet50(include_top=False, weights='imagenet', input_tensor=preproc)

In [87]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_23 (InputLayer)           (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
lambda_22 (Lambda)              (None, 256, 256, 3)  0           input_23[0][0]                   
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 128, 128, 64) 9472        lambda_22[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 128, 128, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

### Fine Tune

In [88]:
for layer in model.layers: layer.trainable=False
flatten = Flatten()(model.layers[-1].output)
sm = Dense(batches.num_classes, activation='softmax')(flatten)
model = Model(model.input, sm)

In [None]:
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [73]:
model.fit(trn_data, trn_labels, validation_data=(val_data, val_labels), batch_size=batch_size, epochs=4)

Train on 17940 samples, validate on 4484 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f01eb6aafd0>

### Add more layers for training

In [89]:
train_idx = model.layers.index(model.get_layer('res5a_branch2a')); print(train_idx)
for layer in model.layers[train_idx:]: layer.trainable=True

142


In [90]:
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [91]:
model.fit(trn_data, trn_labels, validation_data=(val_data, val_labels), batch_size=batch_size, epochs=3)

Train on 17940 samples, validate on 4484 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f01d53caf28>

In [93]:
K.set_value(model.optimizer.lr, 1e-4)

In [94]:
model.fit(trn_data, trn_labels, validation_data=(val_data, val_labels), batch_size=batch_size, epochs=3)

Train on 17940 samples, validate on 4484 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f01d7052a90>

In [95]:
model_name = 'ResNet50'
model.save_weights(model_path + model_name + '_res5aB2a_1.h5')