In [9]:
import numpy as np
import os
import random
import scipy.misc
import time

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D

In [10]:
class DataHelper:
    def __init__(self, data_file):
        xs = []
        ys = []
        
        with open(data_file) as f:
            header = f.readline()
            dirname = os.path.dirname(data_file)
            for line in f:
                fields = line.split(", ")
                xs.append(os.path.join(dirname, fields[0]))
                ys.append(fields[3])
                
        c = list(zip(xs, ys))
        random.shuffle(c)
        xs, ys = zip(*c)
        
        self._batch_pointer = 0
        self._train_xs = xs[:(int)(len(xs) * 0.9)]
        self._train_ys = ys[:(int)(len(xs) * 0.9)]

        val_xs = []
        val_ys = []
        val_size = (int)(len(xs) * 0.1)
        for i in range(val_size):
            val_ys.append(ys[-i])
            val_xs.append(scipy.misc.imread(xs[-i]))
        
        self._val_xs = np.asarray(val_xs)
        self._val_ys = np.asarray(val_ys)
        
    def data_size(self):
        return len(self._train_ys) + len(self._val_ys)
    
    def val_data(self):
        return self._val_xs, self._val_ys

    def next_train_batch(self, batch_size):
        x_out = []
        y_out = []
        for i in range(batch_size):
            data_idx = (self._batch_pointer + i) % len(self._train_ys)
            y_out.append(self._train_ys[data_idx])
            x_out.append(scipy.misc.imread(self._train_xs[data_idx]))

        self._batch_pointer += batch_size
        return np.asarray(x_out), np.asarray(y_out)

In [11]:
dh = DataHelper('data/driving_log.csv')
print(dh.data_size())

8035


In [12]:
x, y = dh.next_train_batch(64)
print(x.shape, y.shape)

(64, 160, 320, 3) (64,)


In [13]:
def simple_conv():
    model = Sequential()
    
    model.add(Convolution2D(24, 5, 5, subsample=(2, 2), input_shape=(160, 320, 3),
                            activation='relu'))
    model.add(Convolution2D(36, 5, 5, subsample=(2, 2), activation='relu'))
    model.add(Convolution2D(48, 5, 5, subsample=(2, 2), activation='relu'))
    
    model.add(Convolution2D(64, 5, 5, activation='relu'))
    model.add(Convolution2D(64, 5, 5, activation='relu'))

    model.add(Flatten())
    model.add(Dense(1164, activation='relu'))    
    model.add(Dense(100, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='tanh'))

    model.compile(optimizer='adam', loss='mean_squared_error')

    return model

In [14]:
model = simple_conv()
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_6 (Convolution2D)  (None, 78, 158, 24)   1824        convolution2d_input_2[0][0]      
____________________________________________________________________________________________________
convolution2d_7 (Convolution2D)  (None, 37, 77, 36)    21636       convolution2d_6[0][0]            
____________________________________________________________________________________________________
convolution2d_8 (Convolution2D)  (None, 17, 37, 48)    43248       convolution2d_7[0][0]            
____________________________________________________________________________________________________
convolution2d_9 (Convolution2D)  (None, 13, 33, 64)    76864       convolution2d_8[0][0]            
___________________________________________________________________________________________

In [15]:
EPOCHS = 1
BATCH_SIZE = 64
DATA_SIZE = dh.data_size()

val_x, val_y = dh.val_data()

print("Starting training")
for steps in range(50):
    step_start = time.time()
    cur_epoch = steps / DATA_SIZE
    x, y = dh.next_train_batch(BATCH_SIZE)

    train_loss = model.train_on_batch(x, y)
    val_loss = model.evaluate(val_x, val_y, verbose=0)
    
    time_taken = time.time() - step_start
    print("Steps {} train loss {:0.6f} validation loss {:0.6f} time taken {:0.1f}s".format(
            steps, train_loss, val_loss, time_taken))
    
    model.save("model.h5")

Starting training
Steps 0 train loss 0.969635 validation loss 1.024164 time taken 3.2s
Steps 1 train loss 1.035814 validation loss 1.024164 time taken 1.5s
Steps 2 train loss 1.002649 validation loss 1.024164 time taken 1.4s
Steps 3 train loss 1.057691 validation loss 1.024164 time taken 1.4s
Steps 4 train loss 0.998042 validation loss 1.024164 time taken 1.3s
Steps 5 train loss 0.992412 validation loss 1.024164 time taken 1.4s
Steps 6 train loss 1.052887 validation loss 1.024164 time taken 1.3s
Steps 7 train loss 1.040333 validation loss 1.024164 time taken 1.4s
Steps 8 train loss 1.075663 validation loss 1.024164 time taken 1.3s
Steps 9 train loss 1.004695 validation loss 1.024164 time taken 1.3s
Steps 10 train loss 1.065208 validation loss 1.024164 time taken 1.3s
Steps 11 train loss 1.021016 validation loss 1.024164 time taken 1.3s
Steps 12 train loss 1.026931 validation loss 1.024164 time taken 1.3s
Steps 13 train loss 1.031902 validation loss 1.024164 time taken 1.4s
Steps 14 tra

In [16]:
y_pred = model.predict(val_x[:10])
print(y_pred.reshape(10))
print(val_y[:10])

[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
['0' '-0.3445879' '-0.2876218' '0' '0.3583844' '0.1765823' '0' '0' '0'
 '-0.05975719']
