In [1]:
import tensorflow as tf
import numpy as np
import os
from utils import BatchGenerator
import utils
# from SamplingRNN import SamplingRNNCell
slim = tf.contrib.slim

  from ._conv import register_converters as _register_converters


# Configeration

These are some very basic parameters for constructing training the model. In the future, it would be very helper to test different parameters. 

In [2]:
SEQ_LEN = 10 # this parameter can be changed. TODO: try longer sequences if memory is available.
BATCH_SIZE = 4 # this parameter can also be changed
LEFT_CONTEXT = 5

HEIGHT = 480
WIDTH = 640
CHANNELS = 3

RNN_SIZE = 32
RNN_PROJ = 32

CSV_HEADER = "index, timestamp, width, height, frame_id, filename, angle, torque, speed, lat, long, alt".split(",")
OUTPUTS = CSV_HEADER[-6:-3] # angle,torque,speed
OUTPUT_DIM = len(OUTPUTS) # predict all features

# Loading Data

In [3]:
TRAIN_DS_PATH = "/home/neil/dataset/udacity/main.csv"
TEST_DS_PATH = "/home/neil/dataset/udacity/test.csv"

(train_seq, valid_seq), (mean, std) = utils.process_csv(filename=TRAIN_DS_PATH, val=5)  # concatenated interpolated.csv from rosbags

# interpolated.csv for testset filled with dummy values
test_seq = utils.read_csv(TEST_DS_PATH)

print(len(list(test_seq)))

IOError: [Errno 2] No such file or directory: '/Users/YongyangNie/Desktop/csvs/main.csv'

In [None]:
layer_norm = lambda x: tf.contrib.layers.layer_norm(inputs=x, center=True, scale=True, activation_fn=None, trainable=True)

def apply_vision_simple(image, keep_prob, batch_size, seq_len, scope=None, reuse=None):
    video = tf.reshape(image, shape=[batch_size, LEFT_CONTEXT + seq_len, HEIGHT, WIDTH, CHANNELS])
    with tf.variable_scope(scope, 'Vision', [image], reuse=reuse):
            net = slim.convolution(video, num_outputs=64, kernel_size=[3,12,12], stride=[1,6,6], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            aux1 = slim.fully_connected(tf.reshape(net[:, -seq_len:, :, :, :], [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.convolution(net, num_outputs=64, kernel_size=[2,5,5], stride=[1,2,2], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            aux2 = slim.fully_connected(tf.reshape(net[:, -seq_len:, :, :, :], [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.convolution(net, num_outputs=64, kernel_size=[2,5,5], stride=[1,1,1], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            aux3 = slim.fully_connected(tf.reshape(net[:, -seq_len:, :, :, :], [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.convolution(net, num_outputs=64, kernel_size=[2,5,5], stride=[1,1,1], padding="VALID")
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            print(net) # TODO must be batch_size x seq_len x ...
            aux4 = slim.fully_connected(tf.reshape(net, [batch_size, seq_len, -1]), 128, activation_fn=None)
            net = slim.fully_connected(tf.reshape(net, [batch_size, seq_len, -1]), 1024, activation_fn=tf.nn.relu)
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu)
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu)
            net = tf.nn.dropout(x=net, keep_prob=keep_prob)
            net = slim.fully_connected(net, 128, activation_fn=None)
            return layer_norm(tf.nn.elu(net + aux1 + aux2 + aux3 + aux4))
        
        
class SamplingRNNCell(tf.nn.rnn_cell.RNNCell):
    
    """Simple sampling RNN cell."""
    def __init__(self, num_outputs, use_ground_truth, internal_cell, keep_prob):
        """
        if use_ground_truth then don't sample
        """
        self._num_outputs = num_outputs
        self._use_ground_truth = use_ground_truth
        self._internal_cell = internal_cell
        self._keep_prob = keep_prob
  
    @property
    def state_size(self):
        return self._num_outputs, self._internal_cell.state_size # previous output and bottleneck state

    @property
    def output_size(self):
        return self._num_outputs

    def __call__(self, inputs, state, scope=None):
        (visual_feats, current_ground_truth) = inputs
        prev_output, prev_state_internal = state
        # the following is just for a baseline
        context = tf.concat([prev_output, visual_feats], 1)
        new_output_internal, new_state_internal = internal_cell(context, prev_state_internal)
        new_output = tf.contrib.layers.fully_connected(inputs=tf.concat([new_output_internal, prev_output, visual_feats], 1), 
                                                       num_outputs=self._num_outputs, activation_fn=None, scope="OutputProjection")
        return new_output, (current_ground_truth if self._use_ground_truth else new_output, new_state_internal)
    
    

# Building the Graph

In [None]:
graph = tf.Graph()

with graph.as_default():
    # inputs
    learning_rate = tf.placeholder_with_default(input=1e-4, shape=())
    keep_prob = tf.placeholder_with_default(input=1.0, shape=())

    inputs = tf.placeholder(shape=(BATCH_SIZE, LEFT_CONTEXT + SEQ_LEN),
                                    dtype=tf.string)  # pathes to png files from the central camera
    targets = tf.placeholder(shape=(BATCH_SIZE, SEQ_LEN, OUTPUT_DIM),
                                     dtype=tf.float32)  # seq_len x batch_size x OUTPUT_DIM
    targets_normalized = (targets - mean) / std

    input_images = tf.stack([tf.image.decode_png(tf.read_file(x)) for x in 
                             tf.unstack(tf.reshape(inputs, shape=[(LEFT_CONTEXT + SEQ_LEN) * BATCH_SIZE]))])
    input_images = -1.0 + 2.0 * tf.cast(input_images, tf.float32) / 255.0
    input_images.set_shape([(LEFT_CONTEXT + SEQ_LEN) * BATCH_SIZE, HEIGHT, WIDTH, CHANNELS])
    visual_conditions_reshaped = apply_vision_simple(image=input_images, keep_prob=keep_prob, 
                                                     batch_size=BATCH_SIZE, seq_len=SEQ_LEN)
    visual_conditions = tf.reshape(visual_conditions_reshaped, [BATCH_SIZE, SEQ_LEN, -1])
    visual_conditions = tf.nn.dropout(x=visual_conditions, keep_prob=keep_prob)

    rnn_inputs_with_ground_truth = (visual_conditions, targets_normalized)
    rnn_inputs_autoregressive = (visual_conditions, tf.zeros(shape=(BATCH_SIZE, SEQ_LEN, OUTPUT_DIM), dtype=tf.float32))

    internal_cell = tf.nn.rnn_cell.LSTMCell(num_units=RNN_SIZE, num_proj=RNN_PROJ)
    cell_with_ground_truth = SamplingRNNCell(num_outputs=OUTPUT_DIM, use_ground_truth=True,
                                             internal_cell=internal_cell, keep_prob=keep_prob)
    cell_autoregressive = SamplingRNNCell(num_outputs=OUTPUT_DIM, use_ground_truth=False,
                                          internal_cell=internal_cell, keep_prob=keep_prob)

    controller_initial_state_variables = utils.get_initial_state(cell_autoregressive.state_size)
    controller_initial_state_autoregressive = utils.deep_copy_initial_state(controller_initial_state_variables)
    controller_initial_state_gt = utils.deep_copy_initial_state(controller_initial_state_variables)

    with tf.variable_scope("predictor"):
        out_gt, controller_final_state_gt = tf.nn.dynamic_rnn(cell=cell_with_ground_truth,
                                                                      inputs=rnn_inputs_with_ground_truth,
                                                                      sequence_length=[SEQ_LEN] * BATCH_SIZE,
                                                                      initial_state=controller_initial_state_gt,
                                                                      dtype=tf.float32,
                                                                      swap_memory=True, time_major=False)
    with tf.variable_scope("predictor", reuse=True):
        out_autoregressive, controller_final_state_autoregressive = tf.nn.dynamic_rnn(cell=cell_autoregressive, 
                                                                                      inputs=rnn_inputs_autoregressive,
                                                                                      sequence_length=[SEQ_LEN] * BATCH_SIZE,
                                                                                      initial_state=controller_initial_state_autoregressive, 
                                                                                      dtype=tf.float32, 
                                                                                      swap_memory=True,
                                                                                      time_major=False)

    mse_gt = tf.reduce_mean(tf.squared_difference(out_gt, targets_normalized))
    mse_autoregressive = tf.reduce_mean(tf.squared_difference(out_autoregressive, targets_normalized))
    mse_autoregressive_steering = tf.reduce_mean(tf.squared_difference(out_autoregressive[:, :, 0], targets_normalized[:, :, 0]))
    steering_predictions = (out_autoregressive[:, :, 0] * std[0]) + mean[0]

    total_loss = mse_autoregressive_steering  # + 0.1 * (mse_gt + mse_autoregressive)

    optimizer = utils.get_optimizer(total_loss, learning_rate)

    tf.summary.scalar("MAIN TRAIN METRIC: rmse_autoregressive_steering", tf.sqrt(mse_autoregressive_steering))
    tf.summary.scalar("rmse_gt", tf.sqrt(mse_gt))
    tf.summary.scalar("rmse_autoregressive", tf.sqrt(mse_autoregressive))

    summaries = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('v3/train_summary', graph=graph)
    valid_writer = tf.summary.FileWriter('v3/valid_summary', graph=graph)
    saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
    

# Pre-training

In [None]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)

checkpoint_dir = os.getcwd() + "/v3"

global_train_step = 0
global_valid_step = 0
global_valid_predictions = {}

KEEP_PROB_TRAIN = 0.25

def do_epoch(session, sequences, mode):
    
    global global_train_step, global_valid_step, global_valid_predictions
    test_predictions = {}
    batch_generator = BatchGenerator(sequence=sequences, seq_len=SEQ_LEN, batch_size=BATCH_SIZE)
    total_num_steps = int(1 + (batch_generator.indices[1] - 1) / SEQ_LEN)
    controller_final_state_gt_cur, controller_final_state_autoregressive_cur = None, None
    acc_loss = np.float128(0.0)
    
    for step in range(total_num_steps):
        
        feed_inputs, feed_targets = batch_generator.next()
        feed_dict = {inputs : feed_inputs, targets : feed_targets}
        if controller_final_state_autoregressive_cur is not None:
            feed_dict.update({controller_initial_state_autoregressive : controller_final_state_autoregressive_cur})
        if controller_final_state_gt_cur is not None:
            feed_dict.update({controller_final_state_gt : controller_final_state_gt_cur})
        
        if mode == "train":
            feed_dict.update({keep_prob : KEEP_PROB_TRAIN})
            summary, _, loss, controller_final_state_gt_cur, controller_final_state_autoregressive_cur = \
                session.run([summaries, optimizer, mse_autoregressive_steering, controller_final_state_gt, controller_final_state_autoregressive],
                           feed_dict = feed_dict)
            train_writer.add_summary(summary, global_train_step)
            global_train_step += 1
        
        elif mode == "valid":
            model_predictions, summary, loss, controller_final_state_autoregressive_cur = \
                session.run([steering_predictions, summaries, mse_autoregressive_steering, controller_final_state_autoregressive],
                           feed_dict = feed_dict)
            valid_writer.add_summary(summary, global_valid_step)
            global_valid_step += 1
            
            feed_inputs = feed_inputs[:, LEFT_CONTEXT:].flatten()
            steering_targets = feed_targets[:, :, 0].flatten()
            model_predictions = model_predictions.flatten()
            stats = np.stack([steering_targets, model_predictions, (steering_targets - model_predictions)**2])
            for i, img in enumerate(feed_inputs):
                global_valid_predictions[img] = stats[:, i]
        
        elif mode == "test":
            model_predictions, controller_final_state_autoregressive_cur = \
                session.run([steering_predictions, controller_final_state_autoregressive],
                           feed_dict = feed_dict)           
            feed_inputs = feed_inputs[:, LEFT_CONTEXT:].flatten()
            model_predictions = model_predictions.flatten()
            for i, img in enumerate(feed_inputs):
                test_predictions[img] = model_predictions[i]
        
        if mode != "test":
            acc_loss += loss
            print('\r', step + 1, "/", total_num_steps, np.sqrt(acc_loss / (step+1)))

    return np.sqrt(acc_loss / total_num_steps) if mode != "test" else test_predictions
            

# Training

In [None]:
NUM_EPOCHS=10

best_validation_score = None

with tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=gpu_options)) as session:
    session.run(tf.initialize_all_variables())
    print('Initialized')
    ckpt = tf.train.latest_checkpoint(checkpoint_dir)
    
    if ckpt:
        print("Restoring from", ckpt)
        saver.restore(sess=session, save_path=ckpt)
    
    for epoch in range(NUM_EPOCHS):
        print("Starting epoch %d" % epoch)
        print("Validation:")
        valid_score = do_epoch(session=session, sequences=valid_seq, mode="valid")
        if best_validation_score is None: 
            best_validation_score = valid_score
            with open("v3/test-predictions-epoch%d" % epoch, "w") as out:
                test_predictions = do_epoch(session=session, sequences=test_seq, mode="test")
                # print >> out, "frame_id,steering_angle"
                for img, pred in test_predictions.items():
                    img = img.replace("challenge_2/Test-final/center/", "")
                    # print >> out, "%s,%f" % (img, pred)
        
        if valid_score < best_validation_score:
            saver.save(session, 'v3/checkpoint-sdc-ch2')
            best_validation_score = valid_score
            print("SAVED at epoch %d" % epoch) 
            with open("v3/valid-predictions-epoch%d" % epoch, "w") as out:
                result = np.float128(0.0)
                for img, stats in global_valid_predictions.items():
                    # print >> out, img, stats
                    result += stats[-1]
            print("Validation unnormalized RMSE:", np.sqrt(result / len(global_valid_predictions)))
            with open("v3/test-predictions-epoch%d" % epoch, "w") as out:
                test_predictions = do_epoch(session=session, sequences=test_seq, mode="test")
                # print >> out, "frame_id,steering_angle"
                for img, pred in test_predictions.items():
                    img = img.replace("challenge_2/Test-final/center/", "")
                    # print >> out, "%s,%f" % (img, pred)
        if epoch != NUM_EPOCHS - 1:
            print("Training")
            do_epoch(session=session, sequences=train_seq, mode="train")
            