In [2]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
import pandas as pd
from glob import glob
from keras.preprocessing import image                  
from tqdm import tqdm
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

Ten classes: `c0`, `c1`, `c2`, `c3`, `c4`, `c5`, `c6`, `c7`, `c8`, `c9`. 

In [30]:
driver_list = pd.read_csv('../../../../capstone-project-data/driver_imgs_list.csv')
driver_list.head()

Unnamed: 0,subject,classname,img
0,p002,c0,img_44733.jpg
1,p002,c0,img_72999.jpg
2,p002,c0,img_25094.jpg
3,p002,c0,img_69092.jpg
4,p002,c0,img_92629.jpg


In [31]:
driver_list.tail()

Unnamed: 0,subject,classname,img
22419,p081,c9,img_56936.jpg
22420,p081,c9,img_46218.jpg
22421,p081,c9,img_25946.jpg
22422,p081,c9,img_67850.jpg
22423,p081,c9,img_9684.jpg


In [32]:
driver_list['classname'].value_counts()

c0    2489
c3    2346
c4    2326
c6    2325
c2    2317
c5    2312
c1    2267
c9    2129
c7    2002
c8    1911
Name: classname, dtype: int64

In [33]:
driver_list['subject'].value_counts()

p021    1237
p022    1233
p024    1226
p026    1196
p016    1078
p066    1034
p049    1011
p051     920
p014     876
p015     875
p035     848
p047     835
p081     823
p012     823
p064     820
p075     814
p061     809
p056     794
p050     790
p052     740
p002     725
p045     724
p039     651
p041     605
p042     591
p072     346
Name: subject, dtype: int64

In [34]:
# There are 26 different drivers in the training set
driver_list['subject'].value_counts().shape[0]

26

In [35]:
# Defining a function to load datasets.
def load_dataset(path, num_classes):
    data = load_files(path)
    data_files = np.array(data['filenames'])
    data_targets = np_utils.to_categorical(np.array(data['target']), num_classes= num_classes)
    return data_files, data_targets

In [36]:
def load_test_dataset(path):
    data = load_files(path)
    data_files = np.array(data['filenames'])
    return data_files

In [37]:
# Load train dataset
train_files, train_targets = load_dataset('../../../../capstone-project-data/imgs/train', 10)

#valid_files, valid_targets = load_dataset('dogImages/valid')
#test_files, test_targets = load_dataset('imgs_test')

In [38]:
# Creating a validation set
train_files, valid_files, train_targets, valid_targets = train_test_split(train_files, train_targets,
                                                                          test_size=0.2, random_state=12)

In [39]:
test_files = load_test_dataset('../../../../capstone-project-data/imgs/test')

In [40]:
test_files

array(['../../../../capstone-project-data/imgs/test/test/img_48438.jpg',
       '../../../../capstone-project-data/imgs/test/test/img_49454.jpg',
       '../../../../capstone-project-data/imgs/test/test/img_94120.jpg',
       ...,
       '../../../../capstone-project-data/imgs/test/test/img_57211.jpg',
       '../../../../capstone-project-data/imgs/test/test/img_58315.jpg',
       '../../../../capstone-project-data/imgs/test/test/img_86805.jpg'],
      dtype='<U63')

In [41]:
train_files

array(['../../../../capstone-project-data/imgs/train/c4/img_71613.jpg',
       '../../../../capstone-project-data/imgs/train/c0/img_67972.jpg',
       '../../../../capstone-project-data/imgs/train/c0/img_24317.jpg',
       ...,
       '../../../../capstone-project-data/imgs/train/c4/img_80041.jpg',
       '../../../../capstone-project-data/imgs/train/c2/img_28974.jpg',
       '../../../../capstone-project-data/imgs/train/c3/img_71837.jpg'],
      dtype='<U62')

In [42]:
train_targets

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [43]:
# Take a look at the output of the load_dataset function
print(train_files[0])
print(train_targets[0])

../../../../capstone-project-data/imgs/train/c4/img_71613.jpg
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]


In [44]:
print(train_files[1])
print(train_targets[1])

../../../../capstone-project-data/imgs/train/c0/img_67972.jpg
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [45]:
print(train_files[2])
print(train_targets[2])

../../../../capstone-project-data/imgs/train/c0/img_24317.jpg
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [46]:
# Load list of driver actions
driver_actions = [item[-3:-1] for item in sorted(glob("../../../../capstone-project-data/imgs/train/*/"))]

In [47]:
driver_actions

['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']

In [48]:
print('There are %d training images.' % len(train_files))
print('There are %d validation images.' % len(valid_files))
print('There are %d test images.' % len(test_files))
print('There are %d possible driver action categories.' % len(driver_actions))

There are 17939 training images.
There are 4485 validation images.
There are 79726 test images.
There are 10 possible driver action categories.


In [49]:
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (640, 480, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 640, 480, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

In [50]:
def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [51]:
partial_train_files = train_files[:1800]
partial_valid_files = valid_files[:440]

partial_train_targets = train_targets[:1800]
partial_valid_targets = valid_targets[:440]

partial_test_files = test_files[:400]

In [52]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(partial_train_files).astype('float32')/255
valid_tensors = paths_to_tensor(partial_valid_files).astype('float32')/255
test_tensors = paths_to_tensor(partial_test_files).astype('float32')/255

100%|██████████| 1800/1800 [00:06<00:00, 265.92it/s]
100%|██████████| 440/440 [00:01<00:00, 282.22it/s]
100%|██████████| 400/400 [00:01<00:00, 286.38it/s]


In [None]:
test_tensors = paths_to_tensor(test_files).astype('float32')/255

 26%|██▋       | 20934/79726 [09:41<27:12, 36.01it/s] 

In [53]:
train_tensors.min()

0.0

In [54]:
train_tensors.max()

1.0

In [55]:
print('train_tensors shape:', train_tensors.shape)
print('valid_tensors shape:', valid_tensors.shape)
print('test_tensors shape:', test_tensors.shape)

train_tensors shape: (1800, 224, 224, 3)
valid_tensors shape: (440, 224, 224, 3)
test_tensors shape: (400, 224, 224, 3)


In [56]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

model = Sequential()

### TODO: Define your architecture.

model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', 
                        input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 16)      208       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 112, 112, 32)      2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 56, 56, 64)        8256      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 28, 28, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 28, 28, 64)        0         
__________

In [57]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [58]:
from keras.callbacks import ModelCheckpoint  

### TODO: specify the number of epochs that you would like to use to train the model.

epochs = 10

### Do NOT modify the code below this line.

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

model.fit(train_tensors, partial_train_targets, 
          validation_data=(valid_tensors, partial_valid_targets),
          epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=1)

Train on 1800 samples, validate on 440 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 1.66512, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 1.66512 to 0.97324, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 0.97324 to 0.49329, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.49329 to 0.39084, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 0.39084 to 0.24979, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 6/10

Epoch 00006: val_loss did not improve
Epoch 7/10

Epoch 00007: val_loss improved from 0.24979 to 0.17111, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 8/10

Epoch 00008: val_loss did not improve
Epoch 9/10

Epoch 00009: val_loss did not improve
Epoch 10/10

Epoch 00010: val_loss impr

<keras.callbacks.History at 0x7f7044b0b080>

In [59]:
model.load_weights('saved_models/weights.best.from_scratch.hdf5')

In [78]:
test_files_sub = [item[49:] for item in test_files]
test_files_sub[:10]

['img_48438.jpg',
 'img_49454.jpg',
 'img_94120.jpg',
 'img_35168.jpg',
 'img_39617.jpg',
 'img_28296.jpg',
 'img_59513.jpg',
 'img_59866.jpg',
 'img_75812.jpg',
 'img_29951.jpg']

In [60]:
predictions = [model.predict(np.expand_dims(tensor, axis=0))[0] for tensor in test_tensors]

In [81]:
predictions[:5]

[array([1.5925621e-09, 9.9976498e-01, 6.3823085e-05, 1.6031933e-08,
        1.1273620e-11, 9.6645249e-07, 1.6851294e-04, 8.4690067e-11,
        1.6607071e-06, 1.8944355e-08], dtype=float32),
 array([4.0941010e-03, 4.5869906e-02, 6.1925679e-02, 1.4693019e-04,
        2.3065804e-05, 3.2453990e-04, 4.3505676e-02, 4.7804308e-03,
        8.0842412e-01, 3.0905535e-02], dtype=float32),
 array([4.0704061e-04, 2.0452106e-07, 4.7248215e-04, 2.1107606e-07,
        1.5778620e-07, 8.0892396e-06, 1.5220662e-05, 9.9774963e-01,
        1.3205766e-03, 2.6443478e-05], dtype=float32),
 array([4.5657125e-05, 3.3312517e-05, 4.6956966e-06, 5.1466901e-02,
        7.5861865e-01, 2.9676875e-10, 8.8369444e-02, 1.7200009e-07,
        1.0121037e-01, 2.5081026e-04], dtype=float32),
 array([8.4603246e-04, 9.4048044e-04, 3.9970651e-03, 9.9407780e-06,
        4.5081968e-05, 2.1724983e-01, 1.0715737e-04, 1.1313303e-04,
        3.8076553e-01, 3.9592576e-01], dtype=float32)]

In [82]:
test_files_sub = [item[49:] for item in test_files]
test_files_sub[:5]

['img_48438.jpg',
 'img_49454.jpg',
 'img_94120.jpg',
 'img_35168.jpg',
 'img_39617.jpg']

In [87]:
submission_format = np.column_stack((np.asarray(test_files_sub[:400]), 
                                     np.asarray(predictions)))

In [90]:
print(submission_format[:5])

[['img_48438.jpg' '1.5925621e-09' '0.999765' '6.3823085e-05'
  '1.6031933e-08' '1.127362e-11' '9.664525e-07' '0.00016851294'
  '8.469007e-11' '1.6607071e-06' '1.8944355e-08']
 ['img_49454.jpg' '0.004094101' '0.045869906' '0.06192568' '0.0001469302'
  '2.3065804e-05' '0.0003245399' '0.043505676' '0.004780431' '0.8084241'
  '0.030905535']
 ['img_94120.jpg' '0.0004070406' '2.0452106e-07' '0.00047248215'
  '2.1107606e-07' '1.577862e-07' '8.08924e-06' '1.5220662e-05'
  '0.9977496' '0.0013205766' '2.6443478e-05']
 ['img_35168.jpg' '4.5657125e-05' '3.3312517e-05' '4.6956966e-06'
  '0.0514669' '0.75861865' '2.9676875e-10' '0.088369444' '1.7200009e-07'
  '0.10121037' '0.00025081026']
 ['img_39617.jpg' '0.00084603246' '0.00094048044' '0.003997065'
  '9.940778e-06' '4.5081968e-05' '0.21724983' '0.00010715737'
  '0.00011313303' '0.38076553' '0.39592576']]


In [91]:
np.savetxt('kaggle_submissions/test_submission.csv', submission_format, delimiter=',', comments='',
           newline= '\n', fmt= '%s', header= 'img, c0, c1, c2, c3, c4, c5, c6, c7 ,c8 ,c9')