In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.layers import Lambda, Conv2D, MaxPooling2D, Dropout, Dense, Flatten
from utils import INPUT_SHAPE, batch_generator
import argparse
import os

Using TensorFlow backend.
  return f(*args, **kwds)


In [21]:
import boto3
s3 = boto3.resource('s3')
s3.Bucket('sagemaker-sandbox-datasets').download_file('auto/train.zip', 'train.zip')

In [22]:
import zipfile
zip_ref = zipfile.ZipFile('train.zip', 'r')
zip_ref.extractall('auto-data')
zip_ref.close()

In [2]:
np.random.seed(0)

In [3]:
def load_data(args):
    """
    Load training data and split it into training and validation set
    """
    data_df = pd.read_csv(os.path.join(args['data_dir'], 'driving_log.csv'), 
                          names=['center', 'left', 'right', 'steering', 'throttle', 'reverse', 'speed'])

    data_df['center'] = data_df['center'].str.extract('.*[\\\\|/](\S*$)', expand=False)
    data_df['left'] = data_df['left'].str.extract('.*[\\\\|/](\S*$)', expand=False)
    data_df['right'] = data_df['right'].str.extract('.*[\\\\|/](\S*$)', expand=False)
    
    X = data_df[['center', 'left', 'right']].values
    y = data_df['steering'].values

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=args['test_size'], random_state=0)

    return X_train, X_valid, y_train, y_valid

In [4]:
def build_model(args):
    """
    Modified NVIDIA model
    """
    model = Sequential()
    model.add(Lambda(lambda x: x/127.5-1.0, input_shape=INPUT_SHAPE))
    model.add(Conv2D(24, 5, 5, activation='elu', subsample=(2, 2)))
    model.add(Conv2D(36, 5, 5, activation='elu', subsample=(2, 2)))
    model.add(Conv2D(48, 5, 5, activation='elu', subsample=(2, 2)))
    model.add(Conv2D(64, 3, 3, activation='elu'))
    model.add(Conv2D(64, 3, 3, activation='elu'))
    model.add(Dropout(args['keep_prob']))
    model.add(Flatten())
    model.add(Dense(100, activation='elu'))
    model.add(Dense(50, activation='elu'))
    model.add(Dense(10, activation='elu'))
    model.add(Dense(1))
    model.summary()

    return model

In [5]:
def train_model(model, args, X_train, X_valid, y_train, y_valid):
    """
    Train the model
    """
    checkpoint = ModelCheckpoint('model-{epoch:03d}.h5',
                                 monitor='val_loss',
                                 verbose=0,
                                 save_best_only=args['save_best_only'],
                                 mode='auto')

    model.compile(loss='mean_squared_error', optimizer=Adam(lr=args['learning_rate']))

    model.fit_generator(batch_generator(args['data_dir'] + '/IMG/', X_train, y_train, args['batch_size'], True),
                        args['samples_per_epoch'],
                        args['nb_epoch'],
                        max_q_size=1,
                        validation_data=batch_generator(args['data_dir'], X_valid, y_valid, args['batch_size'], False),
                        nb_val_samples=len(X_valid),
                        callbacks=[checkpoint],
                        verbose=1)

In [6]:
def s2b(s):
    """
    Converts a string to boolean value
    """
    s = s.lower()
    return s == 'true' or s == 'yes' or s == 'y' or s == '1'

In [None]:
"""
Load train/validation data set and train the model

parser = argparse.ArgumentParser(description='Behavioral Cloning Training Program')
parser.add_argument('-d', help='data directory',        dest='data_dir',          type=str,   default='data')
parser.add_argument('-t', help='test size fraction',    dest='test_size',         type=float, default=0.2)
parser.add_argument('-k', help='drop out probability',  dest='keep_prob',         type=float, default=0.5)
parser.add_argument('-n', help='number of epochs',      dest='nb_epoch',          type=int,   default=10)
parser.add_argument('-s', help='samples per epoch',     dest='samples_per_epoch', type=int,   default=20000)
parser.add_argument('-b', help='batch size',            dest='batch_size',        type=int,   default=40)
parser.add_argument('-o', help='save best models only', dest='save_best_only',    type=s2b,   default='true')
parser.add_argument('-l', help='learning rate',         dest='learning_rate',     type=float, default=1.0e-4)
args = parser.parse_args()
"""
args = {'data_dir': 'auto-data/train', 
        'test_size': 0.2, 
        'keep_prob': 0.5, 
        'nb_epoch': 10, 
        'samples_per_epoch': 20000, 
        'batch_size': 40, 
        'save_best_only': 'true', 
        'learning_rate': 1.0e-4}

print('-' * 30)
print('Parameters')
print('-' * 30)
for key, value in args.items():
    print('{:<20} := {}'.format(key, value))
print('-' * 30)

data = load_data(args)
model = build_model(args)
train_model(model, args, *data)

------------------------------
Parameters
------------------------------
data_dir             := auto-data/train
test_size            := 0.2
keep_prob            := 0.5
nb_epoch             := 10
samples_per_epoch    := 20000
batch_size           := 40
save_best_only       := true
learning_rate        := 0.0001
------------------------------
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_1 (Lambda)            (None, 160, 320, 3)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 78, 158, 24)       1824      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 37, 77, 36)        21636     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 17, 37, 48)        43248     
______________________________________________________________



Epoch 1/10
  951/20000 [>.............................] - ETA: 8:50:42 - loss: 0.0227

In [53]:
data_df = pd.read_csv(os.path.join(args['data_dir'], 'driving_log.csv'), 
                          names=['center', 'left', 'right', 'steering', 'throttle', 'reverse', 'speed'])

In [54]:
data_df

Unnamed: 0,center,left,right,steering,throttle,reverse,speed
0,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.000000,0,0.000004
1,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.000000,0,0.000003
2,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.000000,0,0.000016
3,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.042067,0,0.021187
4,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.215397,0,0.138216
5,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.455436,0,0.437921
6,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.640239,0,0.813169
7,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,0.885211,0,1.572811
8,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,1.000000,0,2.236753
9,C:\Users\michael\Desktop\train\IMG\center_2018...,C:\Users\michael\Desktop\train\IMG\left_2018_0...,C:\Users\michael\Desktop\train\IMG\right_2018_...,0.000000,1.000000,0,2.933698


In [None]:
s3.str.replace('^.a|dog', 'XX-XX ', case=False)

In [55]:
data_df.loc['center']

KeyError: 'the label [center] is not in the [index]'

In [75]:
data_df['center'].str.extract('.*[\\\\|/](\S*$)', expand=False)

0      center_2018_03_30_10_40_41_568.jpg
1      center_2018_03_30_10_40_41_636.jpg
2      center_2018_03_30_10_40_41_706.jpg
3      center_2018_03_30_10_40_41_776.jpg
4      center_2018_03_30_10_40_41_844.jpg
5      center_2018_03_30_10_40_41_914.jpg
6      center_2018_03_30_10_40_41_985.jpg
7      center_2018_03_30_10_40_42_053.jpg
8      center_2018_03_30_10_40_42_120.jpg
9      center_2018_03_30_10_40_42_187.jpg
10     center_2018_03_30_10_40_42_254.jpg
11     center_2018_03_30_10_40_42_321.jpg
12     center_2018_03_30_10_40_42_392.jpg
13     center_2018_03_30_10_40_42_463.jpg
14     center_2018_03_30_10_40_42_533.jpg
15     center_2018_03_30_10_40_42_603.jpg
16     center_2018_03_30_10_40_42_673.jpg
17     center_2018_03_30_10_40_42_743.jpg
18     center_2018_03_30_10_40_42_812.jpg
19     center_2018_03_30_10_40_42_882.jpg
20     center_2018_03_30_10_40_42_951.jpg
21     center_2018_03_30_10_40_43_019.jpg
22     center_2018_03_30_10_40_43_089.jpg
23     center_2018_03_30_10_40_43_