# Beyond Grand Theft Auto V for Training, Testing, and Enhancing Deep Learning in Self Driving Cars

## Training CNN to Estimate Affordance Variables on GTA V Screenshot data

### General Information

Code Author: Chawin Sitawarin (chawins@princeton.edu), Princeton University

Please visit main website: https://princetonautonomous.github.io/ for a complete description of the project. It contains useful information as well as a link to the paper.

- Format of the input images:  
All images are .bmp by default and are all in a single directory. Dataset must be rescaled to the range [-0.9, 0.9] before feeding to the network. The code for scaling is provided [here](#Rescale).  

- Format of the annotation file:  
Annotation must be a plain text file with each line being:  
`track_id, frame_id, angle, car_L, car_M, car_R, lane_LL, lane_L, lane_R, lane_RR\n`  

- Calculate pixel-wise mean of the training set before starting. We save it as one .bmp file with the same dimension as the images. Feel free to save it anyway you like, but the code will need to be very slightly modified.

- Jupyter notebook does not have a functionality to keep on running if the browser closes. The file `run_alexnet.py` is a Python script that runs only the training portion but can be run with `nohup` command to keep it running even after the user logouts.

### Import Required Packages

In [None]:
%matplotlib inline

import random
import threading
import time
from os import listdir

import numpy as np
import tensorflow as tf
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from scipy import misc
import cv2
import matplotlib.pyplot as plt

### Set Parameters

In [None]:
# Set path to directory containing images
TRAIN_IMAGES = '/D/GTA_data/train/data/'
# Set path to the annotation file
TRAIN_ANNOT = '/D/GTA_data/train/annotation_scale.txt'

TEST_IMAGES = '/D/GTA_data/test/data/'
TEST_ANNOT = '/D/GTA_data/test/annotation_scale.txt'

VALID_IMAGES = '/D/GTA_data/valid/data/'
VALID_ANNOT = '/D/GTA_data/valid/annotation_scale.txt'

# Set path to the mean of training set 
MEAN_IMAGE = '/D/GTA_data/train/mean.bmp'
# Set Number of affordance variables
NUM_LABELS = 8
# Threshold on active/inactive state of affordance
ACT_THRES = 0.99

batch_size = 32                # Set size of the batch
input_shape = (210, 280, 3)    # Set input shape of CNN
output_dim = NUM_LABELS        # Number of output dimension     
num_epoch = 100                # Number of epoch to train

# Determine number of batches in the dataset
train_files = [f for f in listdir(TRAIN_IMAGES)]
num_steps = len(train_files) / batch_size

test_files = [f for f in listdir(TEST_IMAGES)]
test_num_steps = len(test_files) / batch_size

valid_files = [f for f in listdir(VALID_IMAGES)]
val_num_steps = len(valid_files) / batch_size

In [None]:
# Setup parameters for scaling and cleaning data
UP_LIM = 100000
angle_lim = 30      # Max limit for angle
lane_dist_lim = 11  # Max limit for lane distance
car_dist_lim = 60   # Max limit for car distance

# Max values to scale [-0.9, 0.9]
angle_max = float(angle_lim) * 10 / 9
car_dist_max = float(car_dist_lim) * 10 / 9
car_dist_inac = car_dist_max * 1.25

lane_dist_max = lane_dist_lim * 10 / 9
lane_dist_inac = lane_dist_max * 1.25

### Build the Model

We use a modified version of AlexNet. There is an extra fully-connected layer at the end of the network with `output_dim` neurons and tanh as activation function. A new model can be easily built and replaces the original.

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization

# AlexNet with batch normalization in Keras 

model = Sequential()
model.add(Convolution2D(64, (11, 11), padding='same', input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))

model.add(Convolution2D(128, (7, 7), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))

model.add(Convolution2D(192, (3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))

model.add(Convolution2D(256, (3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3)))

model.add(Flatten())
model.add(Dense(4096, kernel_initializer='normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(4096, kernel_initializer='normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(1000, kernel_initializer='normal'))
model.add(BatchNormalization())
model.add(Dense(output_dim, kernel_initializer='normal', activation='tanh'))

adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='mse')

In [None]:
# Print model summary
model.summary()

### Setup a Generator

A generator is needed to feed in samples to the network since dataset is too large to fit in the RAM. The code below is adapted from https://github.com/fchollet/keras/issues/1638.

In [None]:
class threadsafe_iter:
    """Takes an iterator/generator and makes it thread-safe by
    serializing call to the `next` method of given iterator/generator.
    """
    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def next(self):
        with self.lock:
            return self.it.next()


def threadsafe_generator(f):
    """A decorator that takes a generator function and makes it thread-safe.
    """
    def g(*a, **kw):
        return threadsafe_iter(f(*a, **kw))
    return g


@threadsafe_generator
def myGenerator(img_dir, ant_path, batch_size):  # write the definition of your data generator
    
    # List all files in directory
    filenames = [f for f in listdir(img_dir)]
    filenames.sort()
    # Read annotation into a list
    with open(ant_path) as ant:
        annotation = ant.readlines()
        annotation = [x.strip() for x in annotation] 
    
    # Shuffle image filenames
    random.seed(1234)
    random.shuffle(filenames)
    random.seed(1234)
    random.shuffle(annotation)
    
    # Load mean image for mean subtraction
    mean = cv2.imread(MEAN_IMAGE)
    mean = cv2.cvtColor(mean, cv2.COLOR_BGR2RGB)
    
    while 1:
        for batch in range(len(filenames) / batch_size):
            x_batch = np.zeros((batch_size, 210, 280, 3), dtype=np.float32)
            y_batch = np.zeros((batch_size, output_dim), dtype=np.float32)
            j = batch * batch_size
            
            for i in range(batch_size):
                im = cv2.imread(img_dir + filenames[j + i])
                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                x_batch[i] = im - mean
                y_batch[i] = annotation[j + i].split(',')[2:]
                
            x_batch /= 255.
            yield x_batch, y_batch

### Setup Callbacks

Keras allows any number of customizable callbacks.

In [None]:
# We can use some built-in callbacks provided by Keras
best_weights_filepath = 'weights.{epoch:02d}-{val_loss:.5f}.hdf5'
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
saveBestModel = keras.callbacks.ModelCheckpoint(best_weights_filepath, monitor='val_loss', 
                                                verbose=1, save_best_only=True, mode='auto')

In [None]:
# Alternatively, we can write our own. This one save all the weights every epoch.

class SaveModel(keras.callbacks.Callback):
    
    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return
 
    def on_epoch_begin(self, epoch, logs={}):
        return
 
    def on_epoch_end(self, epoch, logs={}):
        self.model.save_weights('./epoch' + str(epoch) + '.hdf5')
        return
 
    def on_batch_begin(self, batch, logs={}):
        return
 
    def on_batch_end(self, batch, logs={}):
        return

saveModel = SaveModel()

### Training

In [None]:
# Load weights if you wish to resume training
model.load_weights('./epoch10.hdf5')

In [None]:
# Start training. Please refer to Keras official website for the function arguments
model.fit_generator(myGenerator(TRAIN_IMAGES, TRAIN_ANNOT, batch_size), 
                    num_steps, epochs=num_epoch, verbose=1, 
                    validation_data=myGenerator(VALID_IMAGES, VALID_ANNOT, batch_size), 
                    validation_steps=val_num_steps, max_queue_size=100, 
                    workers=4, use_multiprocessing=False, initial_epoch=0,
                    callbacks=[earlyStopping, saveBestModel])

### Evaluation

In [None]:
# Evaluate the model
model.evaluate_generator(myGenerator(VALID_IMAGES, VALID_ANNOT, batch_size), 
                         val_num_steps, max_queue_size=100, workers=4)

In [None]:
# Utility function that helps in evaluation

def load_data(img_dir, ant_path):
    """Load a list of filenames in <img_dir> and annotation in <ant_path>"""
    
    # List all files in directory
    filenames = [f for f in listdir(img_dir)]
    filenames.sort()
    # Read annotation into a list
    with open(ant_path) as ant:
        annotation = ant.readlines()
        annotation = [x.strip() for x in annotation] 

    # Shuffle image filenames
    random.seed(1234)
    random.shuffle(filenames)
    random.seed(1234)
    random.shuffle(annotation)    

    return filenames, annotation


def predict(model, filepath):
    """Predict one image given <filepath>"""
    
    mean = cv2.imread(MEAN_IMAGE)
    mean = cv2.cvtColor(mean, cv2.COLOR_BGR2RGB)
    
    im = cv2.imread(filepath)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    im = (im - mean) / 255.
    im = im.reshape((1, 210, 280, 3))
    
    return model.predict(im, batch_size=1, verbose=0)[0]


def invert_scale(y):
    """Invert scaling back to original values"""
    
    y_invert = np.zeros((8, ))
    y_invert[0] = y[0] * angle_max
    y_invert[1] = (y[1] * car_dist_max + car_dist_lim) / 2
    y_invert[2] = (y[2] * car_dist_max + car_dist_lim) / 2
    y_invert[3] = (y[3] * car_dist_max + car_dist_lim) / 2
    y_invert[4] = (y[4] * lane_dist_max + lane_dist_lim) / 2
    y_invert[5] = (y[5] * lane_dist_max + lane_dist_lim) / 2
    y_invert[6] = (y[6] * lane_dist_max + lane_dist_lim) / 2
    y_invert[7] = (y[7] * lane_dist_max + lane_dist_lim) / 2
    
    return y_invert

In [None]:
# Show estimated labels vs ground truth as well as show the image

for i in range(100):
    
    index = random.choice(range(len(filenames)))
    print index
    filepath = VALID_IMAGES + filenames[index]
    
    y_pred = predict(model, filepath)
    y_true = annot[index]
    
    print invert_scale(predict(model, filepath))
    print invert_scale(annot[index])

    im = misc.imread(filepath)
    plt.imshow(im)
    plt.axis('off')
    plt.show()

In [None]:
# Calculate error for each variable separately.
# Also exclude error coming from inactive affordance

filenames, annotation = load_data(VALID_IMAGES, VALID_ANNOT)

annot = np.zeros((len(annotation), NUM_LABELS))
for i in range(len(annotation)):
    annot[i] = annotation[i].split(',')[2:] 

error = np.zeros(8, )

for index in range(len(filenames)):

    filepath = VALID_IMAGES + filenames[index]
    
    y_pred = predict(model, filepath)
    y_true = annot[index]
    
    for j in range(NUM_LABELS):
        
        if j == 0:
            error[j] += (y_pred[j] - y_true[j]) ** 2
        else:
            dist_pred = y_pred[j] if y_pred[j] < ACT_THRES else 1
            dist_true = y_true[j] if y_true[j] < ACT_THRES else 1
            error[j] += (dist_pred - dist_true) ** 2
            
print error / len(filenames)

### Clean Up Data

This section of code cleans up bad data.

In [None]:
IMAGES_RAW = '/D/GTA_data/valid/raw/'
IMAGES_CLEAN = '/D/GTA_data/valid/data/'

ANNOT_RAW = '/D/GTA_data/valid/annotation_raw.txt'
ANNOT_CLEAN = '/D/GTA_data/valid/temp.txt'
ANNOT_SCALE = '/D/GTA_data/valid/annotation_scale.txt'

In [None]:
import os
from shutil import copyfile

# Rename files to zero-leading
filenames = [f for f in listdir(IMAGES_RAW)]
for fn in filenames:
    track_nb = fn.split('_')[0].zfill(5)
    frame_nb = fn.split('_')[1].split('.')[0].zfill(4)
    os.rename(IMAGES_RAW + fn, IMAGES_RAW + track_nb + '_' + frame_nb + '.bmp')
    
# Read annotation to list
with open(ANNOT_RAW) as ant:
    annotation = ant.readlines()
    annotation = [x.strip() for x in annotation] 

annot = np.zeros((len(annotation), NUM_LABELS))
for i in range(len(annotation)):
    annot[i] = annotation[i].split(',')[2:]   

del_id = []
frames_with_car = np.zeros((3, ), dtype=np.int32)

# Iterate through data to find indices of bad data
for i in range(len(annotation)):
    for j in range(NUM_LABELS):
        
        # Clean up too large angle
        if j == 0:
            if annot[i, j] < 0:
                if annot[i, j] < -angle_lim and annot[i, j] > -360 + angle_lim:
                    del_id.append(i)
                    break
            else:
                if annot[i, j] > angle_lim and annot[i, j] < 360 - angle_lim:
                    del_id.append(i)
                    break
        
        # Clean up bad car dist
        if j >= 1 and j <= 3:
            if annot[i, j] < UP_LIM:
                frames_with_car[j - 1] += 1
                if annot[i, j] > car_dist_lim:
                    del_id.append(i)
                    break
            else:
                if (frames_with_car[j - 1] > 0) and (frames_with_car[j - 1] <= 5):
                    for k in range(frames_with_car[j - 1]):
                        del_id.append(i - k - 1)
                frames_with_car[j - 1] = 0
        
        # Clean up lane distance
        if j >= 4 and j <= 7:
            if (annot[i, j] > lane_dist_lim and annot[i, j] < UP_LIM) or (annot[i, j] < 0):
                del_id.append(i)
                break

del_id = list(set(del_id))
print 'Number of bad data: ', len(del_id)
print 'Number of clean data: ', (len(annotation) - len(del_id))

# Remove bad data
mask = np.ones(len(annotation), dtype=bool)
mask[del_id] = False
filenames = [f for f in listdir(IMAGES_RAW)]
filenames.sort()
cleaned_annotation = np.array(annotation)[mask]
cleaned_filenames = np.array(filenames)[mask]

# Copy cleaned data to new directory
for fn in cleaned_filenames:
    copyfile(IMAGES_RAW + fn, IMAGES_CLEAN + fn)
# Write cleaned annotation to new file
f = open(ANNOT_CLEAN, 'a')
for ant in cleaned_annotation:
    f.write(ant + '\n') 
f.close()

### Rescale

Rescale all labels to range [-1, 1] ([-0.9, 0.9] in practice to leave a slight margin)

In [None]:
# Read annotation to list
with open(ANNOT_CLEAN) as ant:
    annotation = ant.readlines()
    annotation = [x.strip() for x in annotation] 

annot = np.zeros((len(annotation), NUM_LABELS))
for i in range(len(annotation)):
    annot[i] = annotation[i].split(',')[2:] 
    
max_annot = np.zeros((NUM_LABELS, ))
for i in range(len(annotation)):
    for j in range(NUM_LABELS):
        if annot[i, j] <= UP_LIM and annot[i, j] > max_annot[j]:
            max_annot[j] = annot[i, j]

min_annot = np.min(annot, axis=0)

print 'Max: ', max_annot
print 'Min: ', min_annot

In [None]:
f = open(ANNOT_SCALE, 'a')

for i in range(len(annotation)):
    
    out = annotation[i].split(',')[0].zfill(5)
    out += ', ' + annotation[i].split(',')[1].strip().zfill(4)
    
    if annot[i, 0] > 360 - angle_lim:
        angle = annot[i, 0] - 360
    elif annot[i, 0] < -360 + angle_lim:
        angle = annot[i, 0] + 360 
    else:
        angle = annot[i, 0]
    out += ', ' + `angle / angle_max`
    
    # Rescale car dist from [0, 60] to [-0.9, 0.9]
    for j in range(1, 4):
        if annot[i, j] > UP_LIM:
            dist = car_dist_inac
        else:
            dist = annot[i, j]
        out += ', ' + `(2 * dist - car_dist_lim) / car_dist_max`
    
    # Rescale lane dist from [0, 11] to [-0.9, 0.9]
    for j in range(4, 8):
        if annot[i, j] > UP_LIM:
            dist = lane_dist_inac
        else:
            dist = annot[i, j]
        out += ', ' + `(2 * dist - lane_dist_lim) / lane_dist_max`
    
    f.write(out + '\n')
    
f.close()

In [None]:
# Read annotation to list
with open(ANNOT_SCALE) as ant:
    annotation = ant.readlines()
    annotation = [x.strip() for x in annotation] 

annot = np.zeros((len(annotation), NUM_LABELS))
for i in range(len(annotation)):
    annot[i] = annotation[i].split(',')[2:] 
    
max_annot = np.zeros((NUM_LABELS, ))
for i in range(len(annotation)):
    for j in range(NUM_LABELS):
        if annot[i, j] <= UP_LIM and annot[i, j] > max_annot[j]:
            max_annot[j] = annot[i, j]

min_annot = np.min(annot, axis=0)

print 'Max: ', max_annot
print 'Min: ', min_annot