In [97]:
##Make matplotlib inline
%matplotlib inline

In [98]:
##Print data directory structure for reference

In [99]:
%%bash
tree -d data/distracted-driving/

data/distracted-driving/
├── sample
│   ├── test
│   │   └── unkown
│   ├── train
│   │   ├── c0
│   │   ├── c1
│   │   ├── c2
│   │   ├── c3
│   │   ├── c4
│   │   ├── c5
│   │   ├── c6
│   │   ├── c7
│   │   ├── c8
│   │   └── c9
│   └── valid
│       ├── c0
│       ├── c1
│       ├── c2
│       ├── c3
│       ├── c4
│       ├── c5
│       ├── c6
│       ├── c7
│       ├── c8
│       └── c9
├── test
│   └── unkown
├── train
│   ├── c0
│   ├── c1
│   ├── c2
│   ├── c3
│   ├── c4
│   ├── c5
│   ├── c6
│   ├── c7
│   ├── c8
│   └── c9
└── valid
    ├── c0
    ├── c1
    ├── c2
    ├── c3
    ├── c4
    ├── c5
    ├── c6
    ├── c7
    ├── c8
    └── c9

49 directories


In [100]:
##List files in current directory for reference
%ls

Create_Dataset_Directories.ipynb  [0m[01;34mdistracted-driving-results[0m/  [01;32mvgg16bn.py[0m*
[01;34mdata[0m/                             kevin_lesson1.ipynb          vgg16bn.pyc
distracted_driving_2.ipynb        lesson1.ipynb                [01;32mvgg16.py[0m*
distracted_driving.ipynb          [01;32mutils.py[0m*                    vgg16.pyc
[01;34mdistracted_driving_results[0m/       utils.pyc


In [101]:
##Import and initialize vgg
import utils
import vgg16; reload(vgg16)
from vgg16 import Vgg16
vgg = Vgg16()

In [102]:
##Set directory to either sample or full

##For sample data
path = "data/distracted-driving/sample/"
results_path = 'distracted-driving-results/sample/'

##For full data
#path = "data/distracted-driving/"
#results_path = 'distracted-driving-results/'

In [103]:
##Get batches for training
batches = vgg.get_batches(directory+'train', batch_size=32)

Found 500 images belonging to 10 classes.


In [104]:
##Finetune the model for distracted driving competition
vgg.finetune(batches)

In [105]:
##Get batches for validation
valid_batches = vgg.get_batches(directory+'valid')

Found 200 images belonging to 10 classes.


In [106]:
##Fit model for distracted driving
vgg.fit(batches, valid_batches, nb_epoch=1)

Epoch 1/1


In [107]:
##Test our newly fitted model
test_batches, results = vgg.test(directory+'test', batch_size=64)

Found 500 images belonging to 1 classes.


In [108]:
##Save filenames and results for later so we don't need to rerun if something happens
import numpy as np
import datetime
now_string = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

#Save as current
np.save(results_path+'predictions.dat',results)
np.save(results_path+'filenames.dat',test_batches.filenames)
#Save as archive
np.save(results_path+'archive/predictions_'+now_string+'.dat',results)
np.save(results_path+'archive/filenames_'+now_string+'.dat',test_batches.filenames)

##Save weights as well
#save as current
vgg.model.save_weights(results_path+'current_ft.h5')
#save as archive
vgg.model.save_weights(results_path+'archive/current_ft_'+now_string+'.h5')

In [109]:
##Show the results array
results

array([[ 0.0071,  0.1225,  0.0681, ...,  0.293 ,  0.0535,  0.0714],
       [ 0.0211,  0.0961,  0.072 , ...,  0.2863,  0.0209,  0.0852],
       [ 0.0072,  0.037 ,  0.0594, ...,  0.0168,  0.1055,  0.1623],
       ..., 
       [ 0.0024,  0.1308,  0.5116, ...,  0.1279,  0.0289,  0.0395],
       [ 0.2965,  0.0189,  0.1227, ...,  0.1235,  0.0662,  0.0124],
       [ 0.0151,  0.0812,  0.011 , ...,  0.4612,  0.0919,  0.0378]], dtype=float32)

In [110]:
##Create a pd series of the filenames in the test directory
import pandas as pd
import os
filenames = pd.Series(test_batches.filenames, name='img')
filenames = filenames.str.split('/').str.get(-1) ##Remove directory from filename
filenames.head()

0    img_37018.jpg
1    img_27446.jpg
2    img_36492.jpg
3    img_91370.jpg
4    img_84064.jpg
Name: img, dtype: object

In [111]:
##Create dataframe with results and filenames

#convert results array to df
df = pd.DataFrame(results, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])

#add filename column
df['img'] = filenames

#make filename col first
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

df.head()

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_37018.jpg,0.007131,0.122472,0.068136,0.011123,0.00188,0.04293,0.328448,0.292999,0.053465,0.071416
1,img_27446.jpg,0.021074,0.096113,0.072016,0.031247,0.017283,0.226184,0.143637,0.286348,0.020864,0.085233
2,img_36492.jpg,0.007173,0.037045,0.059379,0.262982,0.001703,0.02147,0.325767,0.016758,0.105454,0.162268
3,img_91370.jpg,0.031195,0.080461,0.161758,0.033912,0.113336,0.091234,0.29755,0.075476,0.100556,0.014522
4,img_84064.jpg,0.04773,0.165801,0.048984,0.00411,0.063262,0.012731,0.444298,0.175747,0.020692,0.016643


In [112]:
##Save submission file to current and archive
df.to_csv(results_path+'submission.csv', index=False)
df.to_csv(results_path+'archive/submission_'+now_string+'.csv', index=False)

In [113]:
##Print results directory for reference

In [114]:
%%bash
tree distracted-driving-results/sample/

distracted-driving-results/sample/
├── archive
│   ├── current_ft_2017-09-18_13-08-05.h5
│   ├── current_ft_2017-09-18_13-09-30.h5
│   ├── filenames_2017-09-18_13-08-05.dat.npy
│   ├── filenames_2017-09-18_13-09-30.dat.npy
│   ├── predictions_2017-09-18_13-08-05.dat.npy
│   ├── predictions_2017-09-18_13-09-30.dat.npy
│   └── submission_2017-09-18_13-09-30.csv
├── current_ft.h5
├── filenames.dat.npy
├── predictions.dat.npy
└── submission.csv

1 directory, 11 files
