## Network Training for Single Domain

#### Setup notes: 
    1) Data and weight file paths must be configured locally
    2) Configuration for image preprocessing must be changed based on which dataset is used (CARLA or BDD)
    3) CARALA test and train directory's expected structure: 
        - images in 'CameraRGB' directory 
        - labels in 'CameraSeg' directory
    4) BDD test and train directory's expected structure:
        - images in 'images' directory 
        - labels in 'labels' directory

In [None]:
import tensorflow as tf
import helper_functions_carla as hf
import numpy as np
import warnings
import os
import time
import pickle
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import shuffle
from functools import reduce
import random

### Model Name and Paths
#### Note: parameter values below must be re-assigned based on local directories

In [None]:
MODEL_NAME = 'network_single_domain'
MODEL_SAVE_VER = '00'

DATA_DIR = os.path.join(os.getcwd(), 'datasets', 'carla')
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
TEST_DIR = os.path.join(DATA_DIR, 'test')
SAVE_DIR = os.path.join(os.getcwd(), 'saved_models', MODEL_NAME, MODEL_SAVE_VER)
WEIGHTS_FILE = os.path.join(os.getcwd(), 'weights', 'pretrained_weights.p')

### Image Preprocessing Configuration
#### Note: comment/uncomment based on which dataset is used

In [None]:
# Configuration for CARLA Images
TRIM = True
TRIM_IND = (115, 523)
FLIP = True
RESHAPE = False
NEW_SHAPE = (800, 408)
LABEL_CHANNELS = [10, 7, 2, 4, 5, 8, 9, 20, 30]
CHANNEL_NAMES = ['Back', 'Vehi', 'Road', 'Fence', 'Ped', 'Poles', 'Side', 'Veg', 'BW', 'OT']
LOSS_WEIGHTS = [0.6, 1.2, 0.6, 1.2, 1.2, 1.2, 0.7, 0.7, 0.6, 0.8]

'''
# Configuration for BDD Images
TRIM = False
TRIM_IND = (0, 720)
FLIP = True
RESHAPE = True
NEW_SHAPE = (640, 360)
LABEL_CHANNELS = [13, 0, 4, 11, 5, 1, 8, 20, 30]
CHANNEL_NAMES = ['Back', 'Vehi', 'Road', 'Fence', 'Ped', 'Poles', 'Side', 'Veg', 'BW', 'OT']
LOSS_WEIGHTS = [0.6, 0.8, 0.6, 1.2, 1.2, 1.2, 0.8, 0.7, 0.7, 0.7]
'''

### Hyperarameters and Global Variables

In [None]:
EPOCHS = 200
SHUFFLE_PER_EPOCH = True
BATCH_SIZE = 12
L2_REG = 1e-5
STD_DEV = 1e-2
LEARNING_RATE = 5e-4
MOMENTUM = 0.9
KEEP_PROB = 0.5 
EPSILON = 1e-6
ADAM_EPSILON = 1e-5
SAVE_EPSILON = 1e-4
PREPROCESS = True
NEW_LABELS = True
NUM_CLASSES = len(LABEL_CHANNELS) + 1

### Network Definition

In [None]:
def network(image_input, keep_prob, weights_file, num_classes):   
    """
    Builds custom network based on VGG:
    1) First 13 layers (10 conv, 3 dialated conv) use pre-trained weights
    2) Remaining layers are new, resized with reduced depth than original VGG layers
    3) Final layer is added with scaled (0.01) output from conv3 layer
    
    
    :param image_input: image input tensor
    :param keep_prob: placeholder for drop out
    :weights_file: path to pre-trained weights file
    :num_classes: number of classes for final output
    :return: logits, prediction, one_hot
    """    
    def conv_layer(name, input_layer, weights):
        """
        Builds conv layers from pre-trained weights
        Adopted from: 
            1) https://github.com/fyu/dilation
            2) https://github.com/ndrplz/dilation-tensorflow
            
        :param name: layer name
        :param input_layer: input tensor
        :param weights: pre-trained weights dictionary
        :return: conv layer tensor
        """
        with tf.name_scope(name):
            kernel = tf.Variable(initial_value=weights[name[:7] + '/kernel:0'], name='kernel')
            bias = tf.Variable(initial_value=weights[name[:7] + '/bias:0'], name='bias')
            conv = tf.nn.conv2d(input_layer, kernel, strides=[1,1,1,1], padding='SAME', name='conv')
            out = tf.nn.bias_add(conv, bias, name='bias_add')
            out = tf.nn.relu(out, name='relu')
            return out

    def aconv_layer(name, input_layer, weights, rate):
        """
        Builds atrous/dilated conv layers from pre-trained weights
        Adopted from: 
            1) https://github.com/fyu/dilation
            2) https://github.com/ndrplz/dilation-tensorflow
            
        :param name: layer name
        :param input_layer: input tensor
        :param weights: pre-trained weights dictionary
        :param rate: rate of dilation
        :return: atrous/dilated conv layer tensor
        """
        with tf.name_scope(name):
            kernel = tf.Variable(initial_value=weights[name[1:8] + '/kernel:0'], name='kernel')
            bias = tf.Variable(initial_value=weights[name[1:8] + '/bias:0'], name='bias')
            aconv = tf.nn.atrous_conv2d(input_layer, kernel, rate, padding='SAME', name='aconv')
            out = tf.nn.bias_add(aconv, bias, name='bias_add')
            out = tf.nn.relu(out, name='relu')
            return out

    def max_pool(name, input_layer):
        """
        Builds maxpooling layer with VGG default values
        
        :param name: layer name
        :param input_layer: input tensor
        :return: maxpooling layer tensor
        """
        return tf.layers.max_pooling2d(input_layer, pool_size=(2,2), strides=(2,2), padding='SAME', name=name)

    with open(weights_file, 'rb') as f:
        pre_w = pickle.load(f)
    
    conv1_1 = conv_layer('conv1_1_64', image_input, pre_w)
    conv1_2 = conv_layer('conv1_2_64', conv1_1, pre_w)
    pool1 = max_pool('pool1', conv1_2)
    
    conv2_1 = conv_layer('conv2_1_128', pool1, pre_w)
    conv2_2 = conv_layer('conv2_2_128', conv2_1, pre_w)
    pool2 = max_pool('pool2', conv2_2)
    
    conv3_1 = conv_layer('conv3_1_256', pool2, pre_w)
    conv3_2 = conv_layer('conv3_2_256', conv3_1, pre_w)
    conv3_3 = conv_layer('conv3_3_256', conv3_2, pre_w)
    pool3 = max_pool('pool3', conv3_3)
    
    conv4_1 = conv_layer('conv4_1_512', pool3, pre_w)
    conv4_2 = conv_layer('conv4_2_512', conv4_1, pre_w)
    conv4_3 = conv_layer('conv4_3_512', conv4_2, pre_w)
    
    # Dilated convolutions, rate = 2
    dconv5_1 = aconv_layer('dconv5_1_512', conv4_3, pre_w, 2)
    dconv5_2 = aconv_layer('dconv5_2_512', dconv5_1, pre_w, 2)
    dconv5_3 = aconv_layer('dconv5_3_512', dconv5_2, pre_w, 2)
    
    # Dialated convolition, rate = 4
    dconv6_1 = tf.layers.conv2d(dconv5_3, 512, kernel_size=7, strides=1, padding='SAME', 
                           name='dconv6_1_512',
                           kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                           kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG),
                           dilation_rate=4, activation=tf.nn.relu)
    
    drop1 = tf.nn.dropout(dconv6_1, keep_prob, name='drop1') 
    
    conv7_1 = tf.layers.conv2d(drop1, 512, kernel_size=1, strides=1, padding='SAME', 
                           name='conv7_1_512',
                           kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                           kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG),
                           activation=tf.nn.relu)
    
    drop2 = tf.nn.dropout(conv7_1, keep_prob, name='drop2')

    
    conv8_1 = tf.layers.conv2d(drop2, num_classes, kernel_size=1, strides=1, padding='SAME', 
                            name='conv8_1_nc',
                            kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))   
    
    # Transposed convolution with a factor of 2
    tconv9_1 = tf.layers.conv2d_transpose(conv8_1, num_classes, 4, 2, padding='SAME', 
                            name='tconv9_1_nc',
                            kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG)) 
    
    conv3_4 = tf.layers.conv2d(conv3_3, num_classes, kernel_size=1, strides=1, padding='SAME', 
                            name='conv3_4_nc',
                            kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG))
    
    conv3_4_scale = tf.multiply(conv3_4, 0.01)
    
    add_conv3_conv9 = tf.add(conv3_4_scale, tconv9_1, name='add_conv3_conv9')
    
    # Transposed convolution with a factor of 4
    tconv10_1 = tf.layers.conv2d_transpose(add_conv3_conv9, num_classes, 8, 4, 
                            padding='SAME', name='tconv10_1_nc',
                            kernel_initializer=tf.truncated_normal_initializer(stddev=STD_DEV),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG)) 
    
    with tf.name_scope('output'):
        softmax = tf.nn.softmax(tconv10_1, name='softmax')
        prediction = tf.argmax(softmax, axis=3, name='prediction')
        one_hot = tf.one_hot(prediction, depth=num_classes, dtype=tf.uint8, name='one_hot')
        
    return tconv10_1, prediction, one_hot

### Network Optimization

In [None]:
def optimize(logits, labels, l_rate, adam_eps, weights=None):
    """
    Creates optimization and loss functions:
        1) Uses Adam optimizer
        2) Loss based on weighted cross entropy + regularization loss
    
    :param logits: logits tensor from network()
    :param labels: placeholder for training labels
    :param l_rate: placeholder for learning rate value
    :param adam_eps: placeholder for Adam epsilon values
    :param weights: placeholder for weights, if None no weighting is applied
    :return: optimizer, total_loss
    """
    with tf.name_scope('optimize'):
        logits = tf.reshape(logits, (-1, NUM_CLASSES))
        labels = tf.to_float(tf.reshape(labels, (-1, NUM_CLASSES)))
        
        if weights is not None:
            softmax = tf.nn.softmax(logits) + EPSILON
            cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax), weights),
                                           reduction_indices=[1])
        else:
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)
            
        cross_entropy_loss = tf.reduce_mean(cross_entropy,
                                            name='xent_mean_loss')
                                        
        reg_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES),
                            name='reg_loss')
        total_loss = tf.add_n([cross_entropy_loss, reg_loss], name='total_loss')
        
        #optimizer = tf.train.MomentumOptimizer(l_rate, m_rate).minimize(total_loss)
        optimizer = tf.train.AdamOptimizer(learning_rate=l_rate, epsilon=adam_eps).minimize(total_loss)
        #optimizer = tf.train.RMSPropOptimizer(learning_rate=l_rate).minimize(total_loss)
        
    return optimizer, total_loss

### Train Model

In [None]:
print(f'MODEL_NAME: {MODEL_NAME}')
print(f'MODEL_SAVE_VER: {MODEL_SAVE_VER}')
print(f'TRAIN_DIR: {TRAIN_DIR}')
print(f'TEST_DIR: {TEST_DIR}')
print(f'SAVE_DIR: {SAVE_DIR}')
print(f'WEIGHTS_FILE: {WEIGHTS_FILE}')

print(f'SHUFFLE_PER_EPOCH: {SHUFFLE_PER_EPOCH}')
print(f'BATCH_SIZE: {BATCH_SIZE}')
print(f'L2_REG: {L2_REG}')
print(f'STD_DEV: {STD_DEV}')
print(f'LEARNING_RATE: {LEARNING_RATE}')
print(f'MOMENTUM: {MOMENTUM}')
print(f'KEEP_PROB: {KEEP_PROB}')
print(f'EPSILON: {EPSILON}')
print(f'ADAM_EPSILON: {ADAM_EPSILON}')

print(f'TRIM: {TRIM}')
print(f'TRIM_IND: {TRIM_IND}')
print(f'FLIP: {FLIP}')
print(f'RESHAPE: {RESHAPE}')
print(f'NEW_SHAPE: {NEW_SHAPE}')
print(f'PREPROCESS: {PREPROCESS}')

print(f'NEW_LABELS: {NEW_LABELS}')
print(f'LABEL_CHANNELS: {LABEL_CHANNELS}')
print(f'CHANNEL_NAMES: {CHANNEL_NAMES}')
print(f'LOSS_WEIGHTS: {LOSS_WEIGHTS}')

get_train_batch = hf.train_batch_gen(TRAIN_DIR, LABEL_CHANNELS, reshape=RESHAPE, new_shape=NEW_SHAPE, 
                                     preprocess=PREPROCESS, new_labels=NEW_LABELS, 
                                     trim=TRIM, trim_ind=TRIM_IND)
get_test_batch, revert_trim_reshape = hf.test_batch_gen(TEST_DIR, LABEL_CHANNELS, reshape=RESHAPE, 
                                      new_shape=NEW_SHAPE, preprocess=PREPROCESS, new_labels=NEW_LABELS,
                                      trim=TRIM, trim_ind=TRIM_IND)

# Test images are loaded into memory to reduce training time
test_images = []
test_labels = []
test_names = []
for images, labels, names in get_test_batch(1):
    test_images.append(images)
    test_labels.append(labels)
    test_names += names

test_images = np.array(test_images, dtype=np.uint8)
test_images = test_images.reshape(-1, *test_images.shape[2:])
test_labels = np.array(test_labels, dtype=np.uint8)
test_labels = test_labels.reshape(-1, *test_labels.shape[2:])   
print(f'test_images.shape: {test_images.shape}')
print(f'test_labels.shape: {test_labels.shape}')

flat_labels_size = reduce(lambda x, y: x*y, test_labels.shape[:-1])
image_org_shape = (test_labels.shape[1], test_labels.shape[2])
flat_offset = BATCH_SIZE*image_org_shape[0]*image_org_shape[1]
    
with tf.Session() as sess:
    tf.set_random_seed(42)
    
    image_input = tf.placeholder(tf.float32, (None, None, None, 3), name='image_input')
    label_input = tf.placeholder(tf.int32, [None, None, None, NUM_CLASSES], name='label_input')
    loss_weights = tf.placeholder(tf.float32, (None), name='loss_weights')
    keep_prob = tf.placeholder_with_default(tf.constant(1.0, dtype=tf.float32), shape=(), name='keep_prob')
    l_rate = tf.placeholder(tf.float32, name='l_rate')
    adam_eps = tf.placeholder(tf.float32, name='adam_eps')
    
    logits, prediction, one_hot = network(image_input, keep_prob, WEIGHTS_FILE, NUM_CLASSES)
    opt, total_loss = optimize(logits, label_input, l_rate, adam_eps, loss_weights)
    
    fscore_avg = 0.0
    best_fscore = 0.0
    best_loss = 9999
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    for epoch in range(EPOCHS):
        start_time = time.time()
        print(f'\nTraining epoch: {epoch+1}/{EPOCHS}')
        
        
        for train_image_batch, train_label_batch, _ in get_train_batch(BATCH_SIZE):
            
            if FLIP:
                if random.randint(0, 1) == 0:
                    # horizontal flip
                    train_image_batch = np.flip(train_image_batch, axis=2)
                    train_label_batch = np.flip(train_label_batch, axis=2)
                
            _, loss = sess.run([opt, total_loss],
                               feed_dict = {image_input: train_image_batch,
                                            label_input: train_label_batch,
                                            keep_prob: KEEP_PROB,
                                            l_rate: LEARNING_RATE,
                                            adam_eps: ADAM_EPSILON,
                                            loss_weights: LOSS_WEIGHTS})
        print(f'Training time: {(time.time() - start_time):#0.3f}s, loss: {loss:#0.5f}') 
        
        sess_time = 0
        total_preds = np.empty((flat_labels_size,), dtype=np.uint8)
        total_labels = np.empty((flat_labels_size,), dtype=np.uint8)
        for offset in range(0, len(test_images), BATCH_SIZE):
            pred_time = time.time()
            test_image_batch = test_images[offset:offset+BATCH_SIZE]
            test_label_batch = test_labels[offset:offset+BATCH_SIZE]            
            preds = sess.run(prediction, feed_dict = {image_input: test_image_batch})
            
            preds = revert_trim_reshape(preds)
            sess_time += time.time() - pred_time
            
            preds_result = np.array(preds, dtype=np.uint8).reshape(-1)
            labels_result = test_label_batch.argmax(axis=3).reshape(-1)
            
            batch_offset = len(test_label_batch)*image_org_shape[0]*image_org_shape[1]
            i = int(offset/BATCH_SIZE)
            total_preds[i*flat_offset:i*flat_offset+batch_offset] = preds_result
            total_labels[i*flat_offset:i*flat_offset+batch_offset] = labels_result
            
        print(f'Prediction session time: {sess_time:#0.3f}s')
        eval_start_time = time.time()
        metrics = precision_recall_fscore_support(total_labels, total_preds)
        print(f'Evaluation time: {(time.time() - eval_start_time):#0.3f}s')
        del total_preds
        del total_labels 
        
        str_title     = f'             '
        str_recall    = f'Recall:    '
        str_precision = f'Precision: '
        str_f1        = f'F1 score:  '
        str_support   = f'Support:   '
        for i, val in enumerate(metrics[0]):
            str_title += f'{CHANNEL_NAMES[i]:10}'
            str_recall += f'{val:#10.6f}'
            str_precision += f'{metrics[1][i]:#10.6f}'
            str_f1 += f'{metrics[2][i]:#10.6f}'
            str_support += f'{metrics[3][i]:10}'
        print(str_title)
        print(str_recall)
        print(str_precision)
        print(str_f1)
        print(str_support)
        
        fscore_avg = np.mean(np.array(metrics[2]))
        print(f'fscore_avg: {fscore_avg:#0.5f}')
        print(f'Total time: {time.time()-start_time:#0.3f}s')
        
        if fscore_avg - best_fscore > SAVE_EPSILON:
            best_fscore = fscore_avg
            saver.save(sess, os.path.join(SAVE_DIR, 'score', MODEL_NAME + '.ckpt'))  
            print('*************** MODEL SAVED ON FSCORE ***************')
        elif best_loss - loss > SAVE_EPSILON:
            best_loss = loss
            saver.save(sess, os.path.join(SAVE_DIR, 'loss', MODEL_NAME + '.ckpt'))  
            print('*** model saved on loss ***')  