## Imports

In [None]:
### Imports
# Modules
import numpy as np
import cv2
import csv
import matplotlib.pyplot as plt
import os
import errno

# utils
from sklearn.utils import shuffle
from imgaug import augmenters as iaa
from sklearn.model_selection import train_test_split
from matplotlib import gridspec

# Keras
from keras.models import Sequential, load_model
from keras.layers import Flatten, Dense, Lambda, Cropping2D, Conv2D, Dropout, MaxPooling2D
from keras.optimizers import Adam, SGD
from keras.regularizers import l2
from keras.utils import plot_model

# Utils
from utils import delete_file, crop, resize, conv_to_graysale
from utils import random_brightness_correction, random_flip, random_shear, random_rotate
from utils import transformed_data_generator, original_data_generator, lenet_generator

# Plot in the notebook
%matplotlib inline

## Constants and generic utils

In [None]:
### Constants
# Data
DATA_DIR = 'data/'
CSV_FILE = 'driving_log.csv'

# Resize constants
TOP_CUT = 30
BOTTOM_CUT = 30
NEW_WIDTH = 64
NEW_HEIGHT = 64
MAX_ROTATION_ANGLE = 15
MAX_SHEAR_SHIFT = 40

# Data generation constants
BATCH_SIZE = 32
STEERING_CORRECTION = 0.23

# Camera parameters
cameras = ['center', 'left', 'right']
cameras_index = {'center':0, 'left':1, 'right':2} # 0:center, 1:left, 2:right

### Training and visualization functions

In [None]:
def select_random_seq(csv_file, minutes):
    # Select a random n minutes sequence to test model
    n_minutes = minutes
    examples_to_select = int(n_minutes*60*12) # 2 minutes * 60 seconds * 12 shots a minute
    total_examples = len(csv_file)
    random_seq_ini = np.random.choice(total_examples-examples_to_select-1)
    random_seq = csv_file[random_seq_ini:random_seq_ini+examples_to_select]
    return random_seq

def load_sequence(csv_seq, data_dir = DATA_DIR):
    counter = 0
    angles = []
    images = []
    times = []
    counter = 0
    for line in csv_seq:
        path = data_dir + line[0].strip()
        image = cv2.imread(path)
        angle = float(line[3])
        images.append(image)
        angles.append(angle)
        times.append(counter)
        counter += 1
    return images, angles, times

In [None]:
### Angle distribution
def visualize_prediction(model, model_name, seq_minutes, tr_fn = crop_resize):
    # model = load_model(model_file)
    rn_seq = select_random_seq(csv_file, seq_minutes)
    images, angles, times = load_sequence(rn_seq)
    images = tr_fn(images)
    steering_angles = model.predict(images).T.squeeze()
    difs = [real-pred for real, pred in zip(angles, steering_angles)]
    zeros = [0]*len(difs)
    
    fig = plt.figure(figsize=(16, 6)) 
    fig.suptitle('Model: {:s} | Real .vs. predicted steering angle | {:.2f} minutes sequence'\
                 .format(model_name, seq_minutes), fontsize=12)
    
    gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) 
    ax0 = plt.subplot(gs[0])
    ax0.plot(times, angles, label='Real')
    ax0.plot(times, steering_angles, label='Predicted')
    ax0.legend(loc='lower right')
    ax0.axis([-10, 12*60*2+10, -0.75, 0.75])
    ax0.spines['top'].set_visible(False)
    ax0.spines['right'].set_visible(False)
    #ax0.spines['bottom'].set_visible(False)
    #ax0.spines['left'].set_visible(False)
    
    ax1 = plt.subplot(gs[1], sharex=ax0)
    ax1.fill_between(times, 0, difs, facecolor='red', label='Difference')
    ax1.legend(loc='lower right')
    ax1.spines['top'].set_visible(False)
    ax1.spines['right'].set_visible(False)
    #ax1.spines['bottom'].set_visible(False)
    #ax1.spines['left'].set_visible(False)
    
    return fig

In [None]:
def print_learning_curves(train_loss, valid_loss, model_name):
    ### Show learning curves
    ### plot the training and validation loss for each epoch
    plt.plot(train_loss)
    plt.plot(valid_loss)
    plt.title(model_name+': mean squared error loss')
    plt.ylabel('mean squared error loss')
    plt.xlabel('epoch')
    plt.legend(['training set', 'validation set'], loc='upper right')
    return fig

In [None]:
### Calculate accuracy
def calc_sign_accuracy(model, X_validation, y_validation, tr_func = crop_resize):
    y_prediction = model.predict(tr_func(X_validation)).T
    real_signs = np.sign(y_validation)
    pred_signs = np.sign(y_prediction)
    return np.sum(real_signs == pred_signs)/len(angles)

In [None]:
def train_model(model, train_data, steps_train, valid_data, steps_valid, epochs):
    ft_model = model
    model_hst_object = \
    model.fit_generator(train_data, steps_per_epoch = steps_train, epochs = epochs,
                        validation_data = valid_data, validation_steps = steps_valid)
    return ft_model, model_hst_object.history['loss'], model_hst_object.history['val_loss']

## Data exploration

In [None]:
### Load data
# Load csv log file
csv_file = []
with open(DATA_DIR+CSV_FILE) as csvfile:
    reader = csv.reader(csvfile)
    next(reader, None)  # skip the headers
    for line in reader:
        csv_file.append(line)

In [None]:
# Split csv in train and validation sets
csv_train, csv_valid = train_test_split(csv_file, test_size = 0.2)

In [None]:
X_data = []
y_data = []
for line in csv_file:
    path = DATA_DIR + line[0].strip()
    image = cv2.imread(path)
    angle = float(line[3])
    X_data.append(image)
    y_data.append(angle)
X_data = np.array(X_data)
y_data = np.array(y_data)
X_train, X_valid, y_train, y_valid = train_test_split(X_data, y_data, test_size = 0.2)

In [None]:
### Sample image
# Get sample image for testing
sample_idx = np.random.choice(len(csv_file))
sample_line = csv_file[sample_idx]
path = DATA_DIR + sample_line[0].strip()
sample_image = cv2.imread(path)
sample_angle = float(sample_line[3])

fig = plt.figure(figsize = (12, 3))
fig.subplots_adjust(left = 0, right = 1, bottom = 0, top = 1, hspace = 0.05, wspace = 0.05)

for camera in cameras:
    path = DATA_DIR + sample_line[cameras_index[camera]].strip()
    image = cv2.imread(path)
    imgplt = fig.add_subplot(1, 3, cameras_index[camera]+1)
    imgplt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    imgplt.axis('off')
    imgplt.set_title('Camera: '+camera, fontsize = 12)

fig.savefig('report-images/cameras_image')

In [None]:
### Driving log exploration
import pandas as pd
pd_data = pd.read_csv(DATA_DIR+CSV_FILE)
pd_data[:5]

In [None]:
### Angle distribution
counter = 0
angles = []
times = []
for line in csv_file:
    angles.append(float(line[3]))
    times.append(counter)
    counter += 1
    if counter > 12*60*2: break
bins = [-0.75 + 0.1*counter for counter in range(16)]

fig = plt.figure(figsize=(16, 3)) 
fig.subplots_adjust(hspace = 0.1, wspace = 0.1)
fig.suptitle("Angle distribution in two minutes sequence")
gs = gridspec.GridSpec(1, 2, width_ratios=[18, 3]) 
ax0 = plt.subplot(gs[0])
ax0.plot(times, angles)
ax0.axis([-10, 12*60*2+10, -0.75, 0.75])
ax1 = plt.subplot(gs[1], sharey=ax0)
hst = ax1.hist(angles, bins = bins, orientation="horizontal")

In [None]:
# Load csv log file
csv_skew_file = []
with open('new-data/'+CSV_FILE) as csvfile:
    reader = csv.reader(csvfile)
    next(reader, None)  # skip the headers
    for line in reader:
        csv_skew_file.append(line)

angles = []
for line in csv_skew_file:
    angles.append(float(line[3]))
max_angle = 0.75
step = 0.05
bins = [-max_angle + step*counter for counter in range(int(max_angle*2/step)+1)]
fig = plt.figure(figsize=(8, 3)) 
hst = plt.hist(angles, bins)
plt.yscale('log')#, nonposy='clip')
axis = plt.xticks([-0.75+0.25*counter for counter in range(7)])
title = plt.title('Log scaled distribution of steering angles in a single lap')

## Data processing

In [None]:
### Testing image transformation
def print_sample_transformation(tr_func, tr_name, image, *argv):
    
    fig = plt.figure(figsize = (15, 2))
    fig.subplots_adjust(left = 0, right = 1, bottom = 0, top = 1, hspace = 0.05, wspace = 0.05)

    # Print non-transformed image
    counter = 1
    imgplt = fig.add_subplot(1, 5, counter)
    imgplt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    imgplt.axis('off')
    imgplt.set_title('Original', fontsize = 12)
    counter += 1
    # Print transformed images
    print_title = True
    for idx in range(4):
        imgplt = fig.add_subplot(1, 5, counter)
        if tr_func == conv_to_graysale:
            tr_image = tr_func(image)
            imgplt.imshow(tr_image.squeeze(), cmap='gray_r')
        else:
            tr_image, tr_angle = tr_func(image, *argv)
            imgplt.imshow(cv2.cvtColor(tr_image, cv2.COLOR_BGR2RGB))
        imgplt.axis('off')
        if print_title:
            imgplt.set_title(tr_name, loc = 'left', fontsize = 12)
            print_title = False
        counter += 1
    return fig

In [None]:
fig = print_sample_transformation(random_flip, 'Flip with 0.5 prob', sample_image, sample_angle)
fig.savefig('report-images/random_flip')

In [None]:
fig = print_sample_transformation(random_brightness_correction, 'Bright adjustment', sample_image, sample_angle)
fig.savefig('report-images/random_brightness_correction')

In [None]:
fig = print_sample_transformation(random_shear, 'Random shear', sample_image, sample_angle)
fig.savefig('report-images/random_shear')

In [None]:
fig = print_sample_transformation(conv_to_graysale, 'Convert to grayscale', sample_image, None)
fig.savefig('report-images/conv_to_graysale')

In [None]:
def resize_pipeline(image):
    
    fig = plt.figure(figsize = (15, 5))
    #fig.subplots_adjust(left = 0, right = 1, bottom = 0, top = 1, hspace = 0.05, wspace = 0.05)
    counter = 0
    
    # Print non-transformed image
    counter += 1
    imgplt = fig.add_subplot(1, 3, counter, autoscale_on = True)
    imgplt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    imgplt.axis('off')
    imgplt.set_title('Original', fontsize = 12)

    
    # Print cropped image
    counter += 1
    cropped_image = crop(image)
    imgplt = fig.add_subplot(1, 3, counter, autoscale_on = True)
    imgplt.imshow(cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB))
    imgplt.axis('off')
    imgplt.set_title('Cropped', fontsize = 16)

    
    #Print resized image
    counter += 1
    resized_image = resize(cropped_image)
    imgplt = fig.add_subplot(1, 3, counter, autoscale_on = True)
    imgplt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
    imgplt.axis('off')
    imgplt.set_title('Resized', fontsize = 12)
    
    return fig

In [None]:
fig = resize_pipeline(sample_image)
fig.savefig('report-images/resize_pipeline')

In [None]:
sample_size = 50

tr_angles = []
counter = 0
for images, angles in transformed_data_generator(csv_file, use_lateral_cameras = False, 
                                                 shear_prob = 0.2, image_load = False):
    counter += 1
    tr_angles = np.concatenate((tr_angles, angles))
    if counter > sample_size: break

or_angles = []
counter = 0
for images, angles in original_data_generator(csv_file, image_load = False):
    counter += 1
    or_angles = np.concatenate((or_angles, angles))
    if counter > sample_size: break        
        
buckets = np.arange(-1,1,0.05)

fig = plt.figure(figsize = (12, 3))
#fig.subplots_adjust(left = 0, right = 1, bottom = 0, top = 1, hspace = 0.05, wspace = 0.05)

hstplt = fig.add_subplot(1, 2, 1)
hist = hstplt.hist(or_angles, bins = buckets)
hstplt.set_title('Angle distribution original data')

hstplt = fig.add_subplot(1, 2, 2)
hist = hstplt.hist(tr_angles, bins = buckets)
hstplt.set_title('Angle distribution transformed data')

fig.savefig('report-images/angle_distribution')

## Generators

In [None]:
for images, angles in transformed_data_generator(csv_file, shear_prob = 0.1):
    break

plot_width, plot_height = 15, 8.3
grid_rows, grid_cols = 4, 8

fig = plt.figure(figsize = (plot_width, plot_height))
fig.subplots_adjust(left = 0, right = 1, bottom = 0, top = 1, hspace = 0.05, wspace = 0.05)
counter = 1
for image, angle in zip(images[:32], angles[:32]):
    imgplt = fig.add_subplot(grid_rows, grid_cols, counter)
    counter += 1
    imgplt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    imgplt.axis('off')
    imgplt.set_title('{:f}'.format(angle))
fig.savefig('report-images/image_generator')

In [None]:
from utils import lenet_generator
for images, angles in lenet_generator(csv_file):
    break
    
plot_width, plot_height = 15, 8.3
grid_rows, grid_cols = 4, 8

fig = plt.figure(figsize = (plot_width, plot_height))
fig.subplots_adjust(left = 0, right = 1, bottom = 0, top = 1, hspace = 0.05, wspace = 0.05)
counter = 1
for image, angle in zip(images[:32], angles[:32]):
    imgplt = fig.add_subplot(grid_rows, grid_cols, counter)
    counter += 1
    imgplt.imshow(image.squeeze(), cmap='gray_r')
    imgplt.axis('off')
    imgplt.set_title('{:f}'.format(angle))
fig.savefig('report-images/lenet_image_generator')

## Models

### Very simple model

In [None]:
#\---> MODEL DEFINITION <----------------------------------------------------\#
def create_model_simple():
    '''
    Very simple model
    '''
    model = Sequential()
    model.add(Lambda(lambda x: x/127.5 - 1.0,input_shape=(64,64,3)))
    model.add(Flatten())
    model.add(Dense(1))    
    model.compile(optimizer=Adam(), loss='mse')
    return model

### Simple model with nonlinearity

In [None]:
#\---> MODEL DEFINITION <----------------------------------------------------\#
def create_model_nl_simple():
    '''
    Very simple model
    '''
    model = Sequential()
    model.add(Lambda(lambda x: x/127.5 - 1.0,input_shape=(64,64,3)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1))    
    model.compile(optimizer=Adam(), loss='mse')
    return model

### C4F1 model

In [None]:
#\---> MODEL DEFINITION <----------------------------------------------------\#
def create_c4f1():
    '''
    Very simple model
    '''
    model = Sequential()
    model.add(Lambda(lambda x: x/127.5 - 1.0,input_shape=(64,64,3)))
    model.add(Conv2D(16, (5, 5), padding = 'same', strides = (1, 1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Conv2D(32, (5, 5), padding = 'same', strides = (1, 1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Conv2D(48, (5, 5), padding = 'same', strides = (1, 1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Conv2D(64, (5, 5), padding = 'same', strides = (1, 1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1))    
    model.compile(optimizer=Adam(), loss='mse')
    return model

### NVidia model

In [None]:
### Model definition
learning_rate = 0.0001
keep_prob = 0.5
def create_nv_model():
    
    # Define nvidia model
    model = Sequential()

    # Lambda layer
    model.add(Lambda(lambda x: x / 255.0 - 0.5, input_shape = (64, 64, 3)))
    
    # Convolutional layers
    model.add(Conv2D(24, (5, 5), padding = 'valid', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(36, (5, 5), padding = 'valid', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(48, (5, 5), padding = 'valid', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(64, (3, 3), padding = 'same', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(64, (3, 3), padding = 'valid', strides = (2, 2), activation = 'relu'))
    
    model.add(Flatten())
    
    # Fully connected layers
    model.add(Dense(1164, activation = 'relu'))
    model.add(Dropout(keep_prob))
    model.add(Dense(100, activation = 'relu'))
    model.add(Dropout(keep_prob))
    model.add(Dense(50, activation = 'relu'))
    model.add(Dropout(keep_prob))
    model.add(Dense(10, activation = 'relu'))
    model.add(Dense(1))
    
    model.compile(loss = 'mse', optimizer = Adam())
    return model

### LeNet model

In [None]:
#\---> MODEL DEFINITION <----------------------------------------------------\#
def create_model_c4f3():
    
    model = Sequential()
    
    model.add(Lambda(lambda x: x/127.5 - 1.0,input_shape=(64,64,3)))
    
    model.add(Convolution2D(32, 8,8 ,border_mode='same', subsample=(4,4)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 8,8 ,border_mode='same',subsample=(4,4)))
    model.add(Activation('relu',name='relu2'))
    model.add(Convolution2D(128, 4,4,border_mode='same',subsample=(2,2)))
    model.add(Activation('relu'))
    model.add(Convolution2D(128, 2,2,border_mode='same',subsample=(1,1)))
    model.add(Activation('relu'))
    
    model.add(Flatten())
    model.add(Dropout(0.5))
    
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128))
    model.add(Dense(1))
    
    model.compile(optimizer=Adam(), loss='mse')
    
    return model

### Simplified nvidia model

In [None]:
def create_simple_nv_model():
    # Define nvidia model
    model = Sequential()
    model.add(Lambda(lambda x: x / 255.0 - 0.5, input_shape = (64, 64, 3)))
    model.add(Conv2D(24, (5, 5), padding = 'valid', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(36, (5, 5), padding = 'valid', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(48, (5, 5), padding = 'valid', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(64, (3, 3), padding = 'same', strides = (2, 2), activation = 'relu'))
    model.add(Conv2D(64, (3, 3), padding = 'valid', strides = (2, 2), activation = 'relu'))
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Dropout(0.5))
    model.add(Dense(64))
    model.add(Dropout(0.5))
    model.add(Dense(16))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Dense(1))
    model.compile(loss = 'mse', optimizer = 'adam')
    return model

### Modified nvidia model

In [None]:
def create_simple_nv_model():
    model = Sequential()

    model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=(64, 64, 3)))

    # starts with five convolutional and maxpooling layers
    model.add(Conv2D(24, (5, 5), padding='same', strides=(2, 2), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))

    model.add(Conv2D(36, (5, 5), padding='same', strides=(2, 2), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))

    model.add(Convolution2D(48, (5, 5), padding='same', strides=(2, 2), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))

    model.add(Convolution2D(64, (3, 3), padding='same', strides=(1, 1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))

    model.add(Convolution2D(64, (3, 3), padding='same', strides=(1, 1), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))

    model.add(Flatten())

    # Next, five fully connected layers
    model.add(Dense(1164))
    model.add(Activation(activation_relu))

    model.add(Dense(100))
    model.add(Activation(activation_relu))

    model.add(Dense(50))
    model.add(Activation(activation_relu))

    model.add(Dense(10))
    model.add(Activation(activation_relu))

    model.add(Dense(1))

    model.summary()

    model.compile(optimizer=Adam(learning_rate), loss="mse", )

### Train model

In [None]:
### Choose model
model_fn = create_model_nl_simple
flag_new = False
train_generator = original_data_generator(csv_train)#, use_lateral_cameras = False, shear_prob = 0.2,)
valid_generator = original_data_generator(csv_valid)
initial_epochs = 3
train_factor = 1
model_name = 'nl_simple'
data_name = 'original'
description = 'Non linear simple model. Non-transformed data'
train_loss = []
valid_loss = []

In [None]:
### Model creation / summary and graph
if flag_new:
    model = model_fn()
else:
    model = load_model('models/'+model_name+'_'+data_name+'.h5')
model.summary()
plot_model(model, show_shapes = True, show_layer_names = True, to_file='report-images/'+model_name+'.png')

In [None]:
# Initial training
epochs = 1
model, partial_train_loss, partial_valid_loss = \
    train_model(model, train_generator, len(csv_train)/BATCH_SIZE/train_factor,
                valid_generator, len(csv_train)/BATCH_SIZE/train_factor, epochs)
train_loss += partial_train_loss
valid_loss += partial_valid_loss
save_model(model, 'models/'+model_name+'_'+data_name+'.h5')

In [None]:
fig_curves = print_learning_curves(train_loss, valid_loss, model_name+'_'+data_name)
fig_curves.savefig('report-images/learning_curves_'+model_name+'_'+data_name)

In [None]:
fig_angles = visualize_prediction(model, model_name+'_'+data_name, 1)
fig_curves.savefig('report-images/angle_prediction_'+model_name+'_'+data_name)