## Retraining Saved Models

#### With slight modifications:
    1) Lower learning rates
    2) Increased Adam epsilon values
    
#### Results from final submitted run (after retraining):
    MODEL_NAME: dcn_v5
    MODEL_SAVE_VER: 00_r2
    TRAIN_DIR: /home/ow/Documents/udacity/lyft/datasets/combined_v05/train_v4
    TEST_DIR: /home/ow/Documents/udacity/lyft/datasets/combined_v05/test_v6
    train_images.shape: (6400, 408, 800, 3)
    train_labels.shape: (6400, 408, 800, 10)
    test_images.shape: (500, 408, 800, 3)
    test_labels.shape: (500, 600, 800, 10)

    Training epoch: 12/200
    Training time: 767.930s, loss: 0.01681
    Prediction session time: 16.900s
    F1 scores: Back   Vehi   Road   Fence  Ped    Poles  Side   Veg    BW     OT      
               0.9549 0.7994 0.9924 0.7919 0.7320 0.7993 0.9660 0.8505 0.9090 0.7948
    prec_v: 0.71346, recall_v: 0.90887
    prec_r: 0.99116, recall_r: 0.99367
    fscore_avg: 0.92666, fscore_v: 0.86167, fscore_r: 0.99166
    Total time: 833.581s
    *************** MODEL SAVED ON SCORE ***************

In [None]:
import tensorflow as tf
import helper_functions as hf
import numpy as np
import warnings
import os
import time
import pickle
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import shuffle
from functools import reduce
import random

In [None]:
MODEL_NAME = 'dcn_v5'
MODEL_RESTORE_VER = '00_r1'
MODEL_SAVE_VER = '00_r2'
best_fscore = 0.90862
SAVE_EPSILON = 1e-4

EPOCHS = 200
SHUFFLE_PER_EPOCH = True
BATCH_SIZE = 12
L2_REG = 1e-5
STD_DEV = 1e-2
LEARNING_RATE = 5e-6
KEEP_PROB = 0.5 
EPSILON = 1e-6
ADAM_EPSILON = 1e-3

TRIM_IND = (115, 523)
FLIP = True
RESHAPE = False
PREPROCESS = True

NEW_LABELS = True
LABEL_CHANNELS = [10, 7, 2, 4, 5, 8, 9, 20, 30]
CHANNEL_NAMES = ['Back', 'Vehi', 'Road', 'Fence', 'Ped', 'Poles', 'Side', 'Veg', 'BW', 'OT']
LOSS_WEIGHTS = [0.3, 1.2, 0.4, 0.3, 1.0, 0.5, 0.3, 0.3, 0.3, 0.5]

NUM_CLASSES = len(LABEL_CHANNELS) + 1

DATA_DIR = os.path.join(os.getcwd(), 'datasets', 'combined_v05')
TRAIN_DIR = os.path.join(DATA_DIR, 'train_v4')
TEST_DIR = os.path.join(DATA_DIR, 'test_v6')

SAVE_DIR = os.path.join(os.getcwd(), 'saved_models', MODEL_NAME, MODEL_SAVE_VER)
RESTORE_DIR = os.path.join(os.getcwd(), 'saved_models', MODEL_NAME, MODEL_RESTORE_VER, 'score')

In [None]:
print(f'Loading datasets ...')

get_train_batch = hf.train_batch_gen(TRAIN_DIR, LABEL_CHANNELS, reshape=RESHAPE, 
                                     preprocess=PREPROCESS, new_labels=NEW_LABELS, 
                                     trim_ind=TRIM_IND)
get_test_batch, revert_trim_reshape = hf.test_batch_gen(TEST_DIR, LABEL_CHANNELS, 
                                      reshape=RESHAPE, preprocess=PREPROCESS, new_labels=NEW_LABELS,
                                      trim_ind=TRIM_IND)
data_load_start = time.time()

train_images = []
train_labels = []
for images, labels, _ in get_train_batch(100):
    train_images.append(images)
    train_labels.append(labels)

train_images = np.array(train_images, dtype=np.uint8)
train_images = train_images.reshape(-1, *train_images.shape[2:])
train_labels = np.array(train_labels, dtype=np.uint8)
train_labels = train_labels.reshape(-1, *train_labels.shape[2:])


test_images = []
test_labels = []
for images, labels, _ in get_test_batch(100):
    test_images.append(images)
    test_labels.append(labels)
    
test_images = np.array(test_images, dtype=np.uint8)
test_images = test_images.reshape(-1, *test_images.shape[2:])
test_labels = np.array(test_labels, dtype=np.uint8)
test_labels = test_labels.reshape(-1, *test_labels.shape[2:])   


flat_labels_size = reduce(lambda x, y: x*y, test_labels.shape[:-1])
image_org_shape = (test_labels.shape[1], test_labels.shape[2])
flat_offset = BATCH_SIZE*image_org_shape[0]*image_org_shape[1]

In [None]:
print(f'MODEL_NAME: {MODEL_NAME}')
print(f'MODEL_RESTORE_VER: {MODEL_RESTORE_VER}')
print(f'MODEL_SAVE_VER: {MODEL_SAVE_VER}')
print(f'TRAIN_DIR: {TRAIN_DIR}')
print(f'TEST_DIR: {TEST_DIR}')
print(f'train_images.shape: {train_images.shape}')
print(f'train_labels.shape: {train_labels.shape}')
print(f'test_images.shape: {test_images.shape}')
print(f'test_labels.shape: {test_labels.shape}')
print(f'Data load time: {time.time() - data_load_start:#0.1f}s')

saver = tf.train.import_meta_graph(os.path.join(RESTORE_DIR, MODEL_NAME + '.ckpt.meta'))

with tf.Session() as sess:

    saver.restore(sess, tf.train.latest_checkpoint(RESTORE_DIR))
    graph = tf.get_default_graph()
    
    image_input = graph.get_tensor_by_name('image_input:0')
    label_input = graph.get_tensor_by_name('label_input:0')
    loss_weights = graph.get_tensor_by_name('loss_weights:0')
    keep_prob = graph.get_tensor_by_name('keep_prob:0')
    l_rate = graph.get_tensor_by_name('l_rate:0')
    adam_eps = graph.get_tensor_by_name('adam_eps:0')
    prediction = graph.get_tensor_by_name('output/prediction:0')
    total_loss = graph.get_tensor_by_name('optimize/total_loss:0')
    opt = graph.get_operation_by_name('optimize/Adam')
    
    fscore_avg = 0.0
    best_loss = 9999
    
    for epoch in range(EPOCHS):
        start_time = time.time()
        print(f'\nTraining epoch: {epoch+1}/{EPOCHS}')
        
        if SHUFFLE_PER_EPOCH:
            train_images, train_labels = shuffle(train_images, train_labels)
            
        for offset in range(0, len(train_images), BATCH_SIZE):
            train_image_batch = train_images[offset:offset+BATCH_SIZE]
            train_label_batch = train_labels[offset:offset+BATCH_SIZE]
            
            if FLIP:
                if random.randint(0, 1) == 0:
                    # horizontal flip
                    train_image_batch = np.flip(train_image_batch, axis=2)
                    train_label_batch = np.flip(train_label_batch, axis=2)
                
            _, loss = sess.run([opt, total_loss],
                               feed_dict = {image_input: train_image_batch,
                                            label_input: train_label_batch,
                                            loss_weights: LOSS_WEIGHTS,
                                            keep_prob: KEEP_PROB,
                                            l_rate: LEARNING_RATE,
                                            adam_eps: ADAM_EPSILON})
            
        print(f'Training time: {(time.time() - start_time):#0.1f}s, loss: {loss:#0.5f}')
            
        sess_time = 0
        total_preds = np.empty((flat_labels_size,), dtype=np.uint8)
        total_labels = np.empty((flat_labels_size,), dtype=np.uint8)
        for offset in range(0, len(test_images), BATCH_SIZE):
            pred_time = time.time()
            test_image_batch = test_images[offset:offset+BATCH_SIZE]
            test_label_batch = test_labels[offset:offset+BATCH_SIZE]            
            preds = sess.run(prediction, feed_dict = {image_input: test_image_batch,
                                                     keep_prob: 1.0})
            
            preds = revert_trim_reshape(preds)
            sess_time += time.time() - pred_time
            
            preds_result = np.array(preds, dtype=np.uint8).reshape(-1)
            labels_result = test_label_batch.argmax(axis=3).reshape(-1)
            
            batch_offset = len(test_label_batch)*image_org_shape[0]*image_org_shape[1]
            i = int(offset/BATCH_SIZE)
            total_preds[i*flat_offset:i*flat_offset+batch_offset] = preds_result
            total_labels[i*flat_offset:i*flat_offset+batch_offset] = labels_result
            
        print(f'Prediction session time: {sess_time:#0.1f}s')
        metrics = precision_recall_fscore_support(total_labels, total_preds)
        del total_preds
        del total_labels 
        
        f1_str_1 = f'F1 scores: '
        f1_str_2 = f'         '
        for i, val in enumerate(metrics[2]):
            f1_str_1 += f'{CHANNEL_NAMES[i]:8}'
            f1_str_2 += f'{val:#8.4f}'
        print(f1_str_1)
        print(f1_str_2)
        
        prec_v = metrics[0][1]
        prec_r = metrics[0][2]
        recall_v = metrics[1][1]
        recall_r = metrics[1][2]
        if (prec_v==0 and recall_v==0) or (prec_r==0 and recall_r==0):
            fscore_avg = 1e-6
            print(f'NaN: division by zero in fscore_avg')
        else:
            fscore_v = 5 * (prec_v * recall_v) / (4 * prec_v + recall_v)
            fscore_r = 1.25 * (prec_r * recall_r) / (0.25 * prec_r + recall_r)
            fscore_avg = (fscore_v + fscore_r) / 2
            print(f'prec_v: {prec_v:#0.5f}, recall_v: {recall_v:#0.5f}')
            print(f'prec_r: {prec_r:#0.5f}, recall_r: {recall_r:#0.5f}')
            print(f'fscore_avg: {fscore_avg:#0.5f}, fscore_v: {fscore_v:#0.5f}, fscore_r: {fscore_r:#0.5f}')
        print(f'Total time: {time.time()-start_time:#0.1f}s')
        
        if fscore_avg - best_fscore > SAVE_EPSILON:
            best_fscore = fscore_avg
            saver.save(sess, os.path.join(SAVE_DIR, 'score', MODEL_NAME + '.ckpt'))  
            print('*************** MODEL SAVED ON SCORE ***************')
        elif best_loss - loss > SAVE_EPSILON:
            best_loss = loss
            saver.save(sess, os.path.join(SAVE_DIR, 'loss', MODEL_NAME + '.ckpt'))  
            print('*** model saved on loss ***')