## Fully Convolutional Network with Dilated Convolutions

### Final Submission for Lyft's Preception Challenge

#### Results from final submitted run (after retraining):
    MODEL_NAME: dcn_v5
    MODEL_SAVE_VER: 00_r2
    TRAIN_DIR: /home/ow/Documents/udacity/lyft/datasets/combined_v05/train_v4
    TEST_DIR: /home/ow/Documents/udacity/lyft/datasets/combined_v05/test_v6
    train_images.shape: (6400, 408, 800, 3)
    train_labels.shape: (6400, 408, 800, 10)
    test_images.shape: (500, 408, 800, 3)
    test_labels.shape: (500, 600, 800, 10)

    Training epoch: 12/200
    Training time: 767.930s, loss: 0.01681
    Prediction session time: 16.900s
    F1 scores: Back   Vehi   Road   Fence  Ped    Poles  Side   Veg    BW     OT      
               0.9549 0.7994 0.9924 0.7919 0.7320 0.7993 0.9660 0.8505 0.9090 0.7948
    prec_v: 0.71346, recall_v: 0.90887
    prec_r: 0.99116, recall_r: 0.99367
    fscore_avg: 0.92666, fscore_v: 0.86167, fscore_r: 0.99166
    Total time: 833.581s
    *************** MODEL SAVED ON SCORE ***************

In [None]:
import tensorflow as tf
import helper_functions as hf
import numpy as np
import warnings
import os
import time
import pickle
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import shuffle
from functools import reduce
import random

### Hyperarameters and Global Variables

In [None]:
MODEL_NAME = 'dcn_v5'
MODEL_SAVE_VER = '00'
SAVE_EPSILON = 1e-4

EPOCHS = 200
SHUFFLE_PER_EPOCH = True
BATCH_SIZE = 12
L2_REG = 1e-5
STD_DEV = 1e-2
LEARNING_RATE = 1e-4
KEEP_PROB = 0.5 
EPSILON = 1e-6
ADAM_EPSILON = 1e-6

TRIM_IND = (121, 497) # Triming is always applied
FLIP = True # Images randomly flipped (horizontal) during training
RESHAPE = False # If images should be reshaped
PREPROCESS = True # If images should be preprocessed

# Consolidated labels used to improve inference speed and reduce memory footprint
NEW_LABELS = True # New labels of 20 (Building + Wall) and 30 (Other + Traffic Sign)
LABEL_CHANNELS = [10, 7, 2, 4, 5, 8, 9, 20, 30]
CHANNEL_NAMES = ['Back', 'Vehi', 'Road', 'Fence', 'Ped', 'Poles', 'Side', 'Veg', 'BW', 'OT']
LOSS_WEIGHTS = [0.3, 1.2, 0.4, 0.3, 1.0, 0.5, 0.3, 0.3, 0.3, 0.5]

NUM_CLASSES = len(LABEL_CHANNELS) + 1

DATA_DIR = os.path.join(os.getcwd(), 'datasets', 'combined_v05')
TRAIN_DIR = os.path.join(DATA_DIR, 'train_v4')
TEST_DIR = os.path.join(DATA_DIR, 'test_v6')
SAVE_DIR = os.path.join(os.getcwd(), 'saved_models', MODEL_NAME, MODEL_SAVE_VER)
WEIGHTS_FILE = os.path.join(os.getcwd(), 'weights', 'pretrained_weights.p')

### Network

In [None]:
def network(image_input, keep_prob, weights_file, num_classes):   
    """
    Builds custom network based on VGG:
    1) First 13 layers (10 conv, 3 dialated conv) use pre-trained weights
    2) Remaining layers are new, resized with reduced depth than original VGG layers
    3) Final layer is added with scaled (0.01) output from conv3 layer
    
    
    :param image_input: image input tensor
    :param keep_prob: placeholder for drop out
    :weights_file: path to pre-trained weights file
    :num_classes: number of classes for final output
    :return: logits, prediction, one_hot
    """    
    def conv_layer(name, input_layer, weights):
        """
        Builds conv layers from pre-trained weights
        Adopted from: 
            1) https://github.com/fyu/dilation
            2) https://github.com/ndrplz/dilation-tensorflow
            
        :param name: layer name
        :param input_layer: input tensor
        :param weights: pre-trained weights dictionary
        :return: conv layer tensor
        """
        with tf.name_scope(name):
            kernel = tf.Variable(initial_value=weights[name[:7] + '/kernel:0'], name='kernel')
            bias = tf.Variable(initial_value=weights[name[:7] + '/bias:0'], name='bias')
            conv = tf.nn.conv2d(input_layer, kernel, strides=[1,1,1,1], padding='SAME', name='conv')
            out = tf.nn.bias_add(conv, bias, name='bias_add')
            out = tf.nn.relu(out, name='relu')
            return out

    def aconv_layer(name, input_layer, weights, rate):
        """
        Builds atrous/dilated conv layers from pre-trained weights
        Adopted from: 
            1) https://github.com/fyu/dilation
            2) https://github.com/ndrplz/dilation-tensorflow
            
        :param name: layer name
        :param input_layer: input tensor
        :param weights: pre-trained weights dictionary
        :param rate: rate of dilation
        :return: atrous/dilated conv layer tensor
        """
        with tf.name_scope(name):
            kernel = tf.Variable(initial_value=weights[name[1:8] + '/kernel:0'], name='kernel')
            bias = tf.Variable(initial_value=weights[name[1:8] + '/bias:0'], name='bias')
            aconv = tf.nn.atrous_conv2d(input_layer, kernel, rate, padding='SAME', name='aconv')
            out = tf.nn.bias_add(aconv, bias, name='bias_add')
            out = tf.nn.relu(out, name='relu')
            return out

    def max_pool(name, input_layer):
        """
        Builds maxpooling layer with VGG default values
        
        :param name: layer name
        :param input_layer: input tensor
        :return: maxpooling layer tensor
        """
        return tf.layers.max_pooling2d(input_layer, pool_size=(2,2), strides=(2,2), padding='SAME', name=name)

    with open(weights_file, 'rb') as f:
        pre_w = pickle.load(f)
    
    conv1_1 = conv_layer('conv1_1_64', image_input, pre_w)
    conv1_2 = conv_layer('conv1_2_64', conv1_1, pre_w)
    pool1 = max_pool('pool1', conv1_2)
    
    conv2_1 = conv_layer('conv2_1_128', pool1, pre_w)
    conv2_2 = conv_layer('conv2_2_128', conv2_1, pre_w)
    pool2 = max_pool('pool2', conv2_2)
    
    conv3_1 = conv_layer('conv3_1_256', pool2, pre_w)
    conv3_2 = conv_layer('conv3_2_256', conv3_1, pre_w)
    conv3_3 = conv_layer('conv3_3_256', conv3_2, pre_w)
    pool3 = max_pool('pool3', conv3_3)
    
    conv4_1 = conv_layer('conv4_1_512', pool3, pre_w)
    conv4_2 = conv_layer('conv4_2_512', conv4_1, pre_w)
    conv4_3 = conv_layer('conv4_3_512', conv4_2, pre_w)
    
    # Dilated Convolutions, rate = 2
    conv5_1 = aconv_layer('dconv5_1_512', conv4_3, pre_w, 2)
    conv5_2 = aconv_layer('dconv5_2_512', conv5_1, pre_w, 2)
    conv5_3 = aconv_layer('dconv5_3_512', conv5_2, pre_w, 2)
    
    # Dialated Convolition, rate = 4
    dconv6_1 = tf.layers.conv2d(conv5_3, 512, kernel_size=7, strides=1, padding='SAME', 
                           name='dconv6_1_512',
                           kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                           kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG),
                           dilation_rate=4, activation=tf.nn.relu)
    
    drop1 = tf.nn.dropout(dconv6_1, keep_prob, name='drop1') 
    
    conv7_1_512 = tf.layers.conv2d(drop1, 512, kernel_size=1, strides=1, padding='SAME', 
                           name='conv7_1_512',
                           kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                           kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG),
                           activation=tf.nn.relu)
    
    drop2 = tf.nn.dropout(conv7_1_512, keep_prob, name='drop2')

    
    conv8_1_10 = tf.layers.conv2d(drop2, num_classes, kernel_size=1, strides=1, padding='SAME', 
                               name='conv8_1_10',
                               kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                               kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))   
    
    conv8_upsample = tf.layers.conv2d_transpose(conv8_1_10, num_classes, 4, 2, padding='SAME', 
                                        name='conv8_up_10',
                                        kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                                        kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG)) 
    
    conv3_4_10 = tf.layers.conv2d(conv3_3, num_classes, kernel_size=1, strides=1, padding='SAME', 
                                 name='conv3_4_10',
                                 kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                                 kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))
    
    conv3_scale = tf.multiply(conv3_4_10, 0.01, name='conv3_scale')
    
    # Combining final output with output from conv3 layer
    conv3_conv8_add = tf.add(conv8_upsample, conv3_scale, name='conv3_conv8_add')
    
    with tf.name_scope('output'):
        logits = tf.layers.conv2d_transpose(conv3_conv8_add, num_classes, 8, 4, 
                                    padding='SAME', name='logits',
                                    kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                                    kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG)) 
        softmax = tf.nn.softmax(logits, name='softmax')
        prediction = tf.argmax(softmax, axis=3, name='prediction')
        one_hot = tf.one_hot(prediction, depth=num_classes, dtype=tf.uint8, name='one_hot')
        
    return logits, prediction, one_hot

### Loss and Optimization function

In [None]:
def optimize(logits, labels, l_rate, adam_eps, weights=None):
    """
    Creates optimization and loss functions:
        1) Uses Adam optimizer
        2) Loss based on weighted cross entropy + regularization loss
    
    :param logits: logits tensor from network()
    :param labels: placeholder for training labels
    :param l_rate: placeholder for learning rate value
    :param adam_eps: placeholder for Adam epsilon values
    :param weights: placeholder for weights, if None no weighting is applied
    :return: optimizer, total_loss
    """
    with tf.name_scope('optimize'):
        logits = tf.reshape(logits, (-1, NUM_CLASSES))
        labels = tf.to_float(tf.reshape(labels, (-1, NUM_CLASSES)))
        
        if weights is not None:
            softmax = tf.nn.softmax(logits) + EPSILON
            cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax), weights),
                                           reduction_indices=[1])
        else:
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)
            
        cross_entropy_loss = tf.reduce_mean(cross_entropy,
                                            name='xent_mean_loss')
                                        
        reg_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES),
                            name='reg_loss')
        total_loss = tf.add_n([cross_entropy_loss, reg_loss], name='total_loss')
        
        
        optimizer = tf.train.AdamOptimizer(learning_rate=l_rate, epsilon=adam_eps).minimize(total_loss)
        
    return optimizer, total_loss

### Model Training

In [None]:
with tf.Session() as sess:
    tf.set_random_seed(42)
    
    print(f'MODEL_NAME: {MODEL_NAME}')
    print(f'MODEL_SAVE_VER: {MODEL_SAVE_VER}')
    print(f'TRAIN_DIR: {TRAIN_DIR}')
    print(f'TEST_DIR: {TEST_DIR}')
    
    get_train_batch = hf.train_batch_gen(TRAIN_DIR, LABEL_CHANNELS, reshape=RESHAPE, 
                                         preprocess=PREPROCESS, new_labels=NEW_LABELS, 
                                         trim_ind=TRIM_IND)
    get_test_batch, revert_trim_reshape = hf.test_batch_gen(TEST_DIR, LABEL_CHANNELS, 
                                          reshape=RESHAPE, preprocess=PREPROCESS, new_labels=NEW_LABELS,
                                          trim_ind=TRIM_IND)
    
    
    # Images loaded into memory for faster training
    # Created swapfiles to increase memory
    train_images = []
    train_labels = []
    train_names = []
    for images, labels, names in get_train_batch(1):
        train_images.append(images)
        train_labels.append(labels)
        train_names += names

    train_images = np.array(train_images, dtype=np.uint8)
    train_images = train_images.reshape(-1, *train_images.shape[2:])
    train_labels = np.array(train_labels, dtype=np.uint8)
    train_labels = train_labels.reshape(-1, *train_labels.shape[2:])
    print(f'train_images.shape: {train_images.shape}')
    print(f'train_labels.shape: {train_labels.shape}')
    
    
    test_images = []
    test_labels = []
    test_names = []
    for images, labels, names in get_test_batch(1):
        test_images.append(images)
        test_labels.append(labels)
        test_names += names

    test_images = np.array(test_images, dtype=np.uint8)
    test_images = test_images.reshape(-1, *test_images.shape[2:])
    test_labels = np.array(test_labels, dtype=np.uint8)
    test_labels = test_labels.reshape(-1, *test_labels.shape[2:])   
    print(f'test_images.shape: {test_images.shape}')
    print(f'test_labels.shape: {test_labels.shape}')
    
    flat_labels_size = reduce(lambda x, y: x*y, test_labels.shape[:-1])
    image_org_shape = (test_labels.shape[1], test_labels.shape[2])
    flat_offset = BATCH_SIZE*image_org_shape[0]*image_org_shape[1]
    
    image_input = tf.placeholder(tf.float32, (None, None, None, 3), name='image_input')
    label_input = tf.placeholder(tf.int32, [None, None, None, NUM_CLASSES], name='label_input')
    loss_weights = tf.placeholder(tf.float32, (None), name='loss_weights')
    keep_prob = tf.placeholder_with_default(tf.constant(1.0, dtype=tf.float32), shape=(), name='keep_prob')
    l_rate = tf.placeholder(tf.float32, name='l_rate')
    adam_eps = tf.placeholder(tf.float32, name='adam_eps')
    
    logits, prediction, one_hot = network(image_input, keep_prob, WEIGHTS_FILE, NUM_CLASSES)
    opt, total_loss = optimize(logits, label_input, l_rate, adam_eps, loss_weights)
    
    fscore_avg = 0.0
    best_fscore = 0.0 # used to save model when fscore_avg increases
    best_loss = 9999
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    for epoch in range(EPOCHS):
        start_time = time.time()
        print(f'\nTraining epoch: {epoch+1}/{EPOCHS}')
        
        ''' 
        # Used to load images from disk, instead of memory
        # Slower but necessary for larger datasets
        # Not needed as swapfiles were used to increase functional RAM
        for train_image_batch, train_label_batch, _ in get_train_batch(BATCH_SIZE):
            
            if FLIP:
                if random.randint(0, 1) == 0:
                    # horizontal flip
                    train_image_batch = np.flip(train_image_batch, axis=2)
                    train_label_batch = np.flip(train_label_batch, axis=2)
                
            _, loss = sess.run([opt, total_loss],
                               feed_dict = {image_input: train_image_batch,
                                            label_input: train_label_batch,
                                            keep_prob: KEEP_PROB,
                                            l_rate: LEARNING_RATE})
        print(f'Training time: {(time.time() - start_time):#0.3f}s, loss: {loss:#0.5f}') 
        '''
        
        if SHUFFLE_PER_EPOCH:
            train_images, train_labels, train_names = shuffle(train_images, train_labels, train_names)
            
        for offset in range(0, len(train_images), BATCH_SIZE):
            train_image_batch = train_images[offset:offset+BATCH_SIZE]
            train_label_batch = train_labels[offset:offset+BATCH_SIZE]
            
            if FLIP:
                if random.randint(0, 1) == 0:
                    # horizontal flip
                    train_image_batch = np.flip(train_image_batch, axis=2)
                    train_label_batch = np.flip(train_label_batch, axis=2)
                
            _, loss = sess.run([opt, total_loss],
                               feed_dict = {image_input: train_image_batch,
                                            label_input: train_label_batch,
                                            loss_weights: LOSS_WEIGHTS,
                                            keep_prob: KEEP_PROB,
                                            l_rate: LEARNING_RATE,
                                            adam_eps: ADAM_EPSILON})
        print(f'Training time: {(time.time() - start_time):#0.3f}s, loss: {loss:#0.5f}')
         
        
        sess_time = 0
        total_preds = np.empty((flat_labels_size,), dtype=np.uint8)
        total_labels = np.empty((flat_labels_size,), dtype=np.uint8)
        for offset in range(0, len(test_images), BATCH_SIZE):
            pred_time = time.time()
            test_image_batch = test_images[offset:offset+BATCH_SIZE]
            test_label_batch = test_labels[offset:offset+BATCH_SIZE]            
            preds = sess.run(prediction, feed_dict = {image_input: test_image_batch})
            
            preds = revert_trim_reshape(preds)
            sess_time += time.time() - pred_time
            
            preds_result = np.array(preds, dtype=np.uint8).reshape(-1)
            labels_result = test_label_batch.argmax(axis=3).reshape(-1)
            
            batch_offset = len(test_label_batch)*image_org_shape[0]*image_org_shape[1]
            i = int(offset/BATCH_SIZE)
            total_preds[i*flat_offset:i*flat_offset+batch_offset] = preds_result
            total_labels[i*flat_offset:i*flat_offset+batch_offset] = labels_result
            
        print(f'Prediction session time: {sess_time:#0.3f}s')
        metrics = precision_recall_fscore_support(total_labels, total_preds)
        del total_preds
        del total_labels 
        
        f1_str_1 = f'F1 scores: '
        f1_str_2 = f'         '
        for i, val in enumerate(metrics[2]):
            f1_str_1 += f'{CHANNEL_NAMES[i]:8}'
            f1_str_2 += f'{val:#8.4f}'
        print(f1_str_1)
        print(f1_str_2)
        
        prec_v = metrics[0][1]
        prec_r = metrics[0][2]
        recall_v = metrics[1][1]
        recall_r = metrics[1][2]
        if (prec_v==0 and recall_v==0) or (prec_r==0 and recall_r==0):
            fscore_avg = 1e-6
            print(f'NaN: division by zero in fscore_avg')
        else:
            fscore_v = 5 * (prec_v * recall_v) / (4 * prec_v + recall_v)
            fscore_r = 1.25 * (prec_r * recall_r) / (0.25 * prec_r + recall_r)
            fscore_avg = (fscore_v + fscore_r) / 2
            print(f'prec_v: {prec_v:#0.5f}, recall_v: {recall_v:#0.5f}')
            print(f'prec_r: {prec_r:#0.5f}, recall_r: {recall_r:#0.5f}')
            print(f'fscore_avg: {fscore_avg:#0.5f}, fscore_v: {fscore_v:#0.5f}, fscore_r: {fscore_r:#0.5f}')
        print(f'Total time: {time.time()-start_time:#0.3f}s')
        
        if fscore_avg - best_fscore > SAVE_EPSILON:
            best_fscore = fscore_avg
            saver.save(sess, os.path.join(SAVE_DIR, 'score', MODEL_NAME + '.ckpt'))  
            print('*************** MODEL SAVED ON SCORE ***************')
        elif best_loss - loss > SAVE_EPSILON:
            best_loss = loss
            saver.save(sess, os.path.join(SAVE_DIR, 'loss', MODEL_NAME + '.ckpt'))  
            print('*** model saved on loss ***')  