# Load Tensorflow and check version. Check GPU exists or not

In [1]:
#!/usr/bin/env python3
import os.path
import tensorflow as tf
import helper
import warnings
from distutils.version import LooseVersion
import project_tests as tests


# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.4.0
Default GPU Device: /device:GPU:0


# Load Pre-trained VGG Network - Encoder part

In [2]:
def load_vgg(sess, vgg_path):
    """
    Load Pretrained VGG Model into TensorFlow.
    :param sess: TensorFlow Session
    :param vgg_path: Path to vgg folder, containing "variables/" and "saved_model.pb"
    :return: Tuple of Tensors from VGG model (image_input, keep_prob, layer3_out, layer4_out, layer7_out)
    """
    # TODO: Implement function
    #   Use tf.saved_model.loader.load to load the model and weights
    vgg_tag = 'vgg16'
    vgg_input_tensor_name = 'image_input:0'
    vgg_keep_prob_tensor_name = 'keep_prob:0'
    vgg_layer3_out_tensor_name = 'layer3_out:0'
    vgg_layer4_out_tensor_name = 'layer4_out:0'
    vgg_layer7_out_tensor_name = 'layer7_out:0'
    
    tf.saved_model.loader.load(sess, [vgg_tag], vgg_path)
    img_input = tf.get_default_graph().get_tensor_by_name(vgg_input_tensor_name)
    keep_prob = tf.get_default_graph().get_tensor_by_name(vgg_keep_prob_tensor_name)
    print("keep_prob tensor shape = " + str())
    layer3_out = tf.get_default_graph().get_tensor_by_name(vgg_layer3_out_tensor_name)
    print("layer3 shape = " + str(layer3_out.get_shape()))
    layer4_out = tf.get_default_graph().get_tensor_by_name(vgg_layer4_out_tensor_name)
    layer7_out = tf.get_default_graph().get_tensor_by_name(vgg_layer7_out_tensor_name)
    
    return img_input, keep_prob, layer3_out, layer4_out, layer7_out

In [3]:
tests.test_load_vgg(load_vgg, tf)

keep_prob tensor shape = 
layer3 shape = <unknown>
Tests Passed


## Create Layers - Decoder part

In [3]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
    """
    Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
    :param vgg_layer3_out: TF Tensor for VGG Layer 3 output
    :param vgg_layer4_out: TF Tensor for VGG Layer 4 output
    :param vgg_layer7_out: TF Tensor for VGG Layer 7 output
    :param num_classes: Number of classes to classify
    :return: The Tensor for the last layer of output
    """
    # TODO: Implement function
    # Step # 1: 1x1 Convolution of vgg_layer7_out
    layer7_out_1x1 = tf.layers.conv2d(vgg_layer7_out, num_classes, 1, padding = 'same', 
                                      kernel_initializer = tf.random_normal_initializer(stddev=0.01),
                                      kernel_regularizer= tf.contrib.layers.l2_regularizer(1e-3))
    
    # Step # 2: Upsampling
    layer7_out_upsampled = tf.layers.conv2d_transpose(layer7_out_1x1, num_classes, 4, 
                                                        strides = (2,2),
                                                        padding = 'same',
                                                        kernel_initializer = tf.random_normal_initializer(stddev=0.01),
                                                        kernel_regularizer= tf.contrib.layers.l2_regularizer(1e-3))
    
    # Step # 3: Match vgg_layer 4 dimensions to layers 7 upsampled output by using 1x1 convolution
    layer4_out_1x1 = tf.layers.conv2d(vgg_layer4_out, num_classes, 1, padding = 'same', 
                                      kernel_initializer = tf.random_normal_initializer(stddev=0.01),
                                      kernel_regularizer= tf.contrib.layers.l2_regularizer(1e-3))
    
    
    # Step # 4: Skip connection by element-wise addition
    layer4_7_skip_conn = tf.add(layer7_out_upsampled, layer4_out_1x1)
    
    # Step # 5: Upsampling
    layer4_7_upsampled = tf.layers.conv2d_transpose(layer4_7_skip_conn, num_classes, 4, 
                                                        strides = (2,2),
                                                        padding = 'same',
                                                        kernel_initializer = tf.random_normal_initializer(stddev=0.01),
                                                        kernel_regularizer= tf.contrib.layers.l2_regularizer(1e-3))
    
    # Step 6: Repeat process for VGG Layer 3
    layer3_out_1x1 = tf.layers.conv2d(vgg_layer3_out, num_classes, 1, padding = 'same', 
                                      kernel_initializer = tf.random_normal_initializer(stddev=0.01),
                                      kernel_regularizer= tf.contrib.layers.l2_regularizer(1e-3))
    
    # Step 6: Skip connection by element-wise addition
    layer4_7_3_skip_conn = tf.add(layer4_7_upsampled, layer3_out_1x1)
    
    # Step 7: Upsampling
    final_layer = tf.layers.conv2d_transpose(layer4_7_3_skip_conn, num_classes, 16, 
                                                        strides = (8,8),
                                                        padding = 'same',
                                                        kernel_initializer = tf.random_normal_initializer(stddev=0.01),
                                                        kernel_regularizer= tf.contrib.layers.l2_regularizer(1e-3))
                                                        
    return final_layer

tests.test_layers(layers)

Tests Passed


## Loss Function

In [4]:
def optimize(nn_last_layer, correct_label, learning_rate, num_classes):
    """
    Build the TensorFLow loss and optimizer operations.
    :param nn_last_layer: TF Tensor of the last layer in the neural network
    :param correct_label: TF Placeholder for the correct label image
    :param learning_rate: TF Placeholder for the learning rate
    :param num_classes: Number of classes to classify
    :return: Tuple of (logits, train_op, cross_entropy_loss)
    """
    # TODO: Implement function
    
    # Reshape last layer as 2-D output with row representing pixel and column - binary for road/non-road
    logits = tf.reshape(nn_last_layer, (-1, num_classes), name="vsk_logits")
    correct_labels = tf.reshape(correct_label, (-1, num_classes))
    
    # Define the loss function - which a softmax cross entropy
    cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, 
                                                                                labels = correct_labels))
    
    # We will use Adam optimizer for finding the weights
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    train_op = optimizer.minimize(cross_entropy_loss)
    
    return logits, train_op, cross_entropy_loss

tests.test_optimize(optimize)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Tests Passed


## Define Training pipeline

In [5]:
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate):
    """
    Train neural network and print out the loss during training.
    :param sess: TF Session
    :param epochs: Number of epochs
    :param batch_size: Batch size
    :param get_batches_fn: Function to get batches of training data.  Call using get_batches_fn(batch_size)
    :param train_op: TF Operation to train the neural network
    :param cross_entropy_loss: TF Tensor for the amount of loss
    :param input_image: TF Placeholder for input images
    :param correct_label: TF Placeholder for label images
    :param keep_prob: TF Placeholder for dropout keep probability
    :param learning_rate: TF Placeholder for learning rate
    """
    # TODO: Implement function
    
    sess.run(tf.global_variables_initializer())
    print("Starting training ... ")
    
    log_loss = []
    
    for i in range(epochs):
        epoch_loss = 0
        batch_count = 0
        for img, label in get_batches_fn(batch_size):
            _, loss = sess.run([train_op, cross_entropy_loss],
                              feed_dict = {input_image: img, 
                                           correct_label: label, 
                                           keep_prob: 0.5,
                                           learning_rate: 1e-3})
            print("Loss at epoch # " + str(i+1)  + " = " + str(loss))
            
            epoch_loss += loss
            batch_count += 1
            
        log_loss.append(epoch_loss / batch_count)
    
    return log_loss
            
tests.test_train_nn(train_nn)

In [9]:
import scipy
import numpy as np

def gen_test_output(sess, logits, keep_prob, image_pl, image_shape, image):
    """ 
    Generate test output using the test images
    :param sess: TF session
    :param logits: TF Tensor for the logits
    :param keep_prob: TF Placeholder for the dropout keep robability
    :param image_pl: TF Placeholder for the image placeholder
    :param data_folder: Path to the folder that contains the datasets
    :param image_shape: Tuple - Shape of image
    :return: Output for for each test image
    """
    image = scipy.misc.imresize(image, image_shape)
    
    im_softmax = sess.run(
          [tf.nn.softmax(logits)],
          {keep_prob: 1.0, image_pl: [image]})
    im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1])
    segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1)
    mask = np.dot(segmentation, np.array([[0, 255, 0, 127]]))
    mask = scipy.misc.toimage(mask, mode="RGBA")
    street_im = scipy.misc.toimage(image)
    street_im.paste(mask, box=None, mask=mask)

    #yield os.path.basename(image_file), np.array(street_im)
    return street_im

## Training

In [6]:
def run():
    num_classes = 2
    image_shape = (160, 576)
    data_dir = './data'
    runs_dir = './runs'
    
    model_runs_dir = './runs/final'
    
    print("Starting training process")

    tests.test_for_kitti_dataset(data_dir)
    
    # Download pretrained vgg model
    helper.maybe_download_pretrained_vgg(data_dir)
    
    print("DOwnload of VGG model done")

    # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset.
    # You'll need a GPU with at least 10 teraFLOPS to train on.
    #  https://www.cityscapes-dataset.com/
    
    new_graph = tf.Graph()
    
    with tf.Session(graph=new_graph) as sess:
        # Path to vgg model
        vgg_path = os.path.join(data_dir, 'vgg')
        # Create function to get batches
        get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape)

        # OPTIONAL: Augment Images for better results
        #  https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network

        # TODO: Build NN using load_vgg, layers, and optimize function
        epochs = 65
        batch_size = 4

        correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label')
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')
        
        input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path)
        nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes)
        logits, train_op, cross_entropy_loss = optimize(nn_last_layer, correct_label, learning_rate, num_classes)
        
        sess.run(tf.global_variables_initializer())

        # TODO: Train NN using the train_nn function
        print("Before starting the training")
        log_loss = train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate)
        
        print("Training completed")
        
        saver = tf.train.Saver(tf.trainable_variables())
        
        saver_def = saver.as_saver_def()
        save_path = os.path.join(model_runs_dir, 'semantic_model')
        save_path_pb = os.path.join(model_runs_dir, 'semantic_model.pb')
        
        saver.save(sess, save_path)
        tf.train.write_graph(sess.graph_def,  '.', save_path_pb, as_text=False)
        
        # Save inference data using helper.save_inference_samples
        folder_name = helper.save_inference_samples(model_runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)


In [9]:
run()

## Load Graph

In [7]:
#!/usr/bin/env python3
import os.path
import tensorflow as tf
import helper
import warnings
from distutils.version import LooseVersion
import project_tests as tests

def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    
    graph = tf.Graph()
    # Then, we import the graph_def into a new Graph and returns it 
    with graph.as_default() as g:
        # The name var will prefix every op/nodes in your graph
        # Since we load everything in a new graph, this is not needed
        tf.import_graph_def(graph_def)
    return graph

In [None]:
def process_frame(frame, frozen_graph):
    img_shape = (160, 576)
    graph = load_graph(frozen_graph)
    logits = graph.get_tensor_by_name('prefix/softmax_cross_entropy_with_logits_sg:1')
    keep_prob = graph.get_tensor_by_name('prefix/keep_prob:0')
    input_image = graph.get_tensor_by_name('prefix/image_input:0')
    
    with tf.Session(graph=graph) as sess:
        semantic_img = gen_test_output(sess, logits, keep_prob, input_image, img_shape, frame)
        return semantic_img

## Inference - Process Test Images

In [None]:
frozen_graph = '/home/ubuntu/CarND-Semantic-Segmentation/runs/final/semantic_frozen_graph.pb'
graph = load_graph(frozen_graph)
data_dir = './data'
num_classes = 2
model_runs_dir = './runs/final'
image_shape = (160, 576)

with tf.Session(graph=graph) as sess:
    
    input_image = graph.get_tensor_by_name('import/image_input:0')
    keep_prob = graph.get_tensor_by_name('import/keep_prob:0')
    logits = graph.get_tensor_by_name('import/vsk_logits:0')
    
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    folder_name = helper.save_inference_samples(model_runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)

## Inference - Process Video

In [None]:
frozen_graph = '/home/ubuntu/CarND-Semantic-Segmentation/runs/final/semantic_frozen_graph.pb'
graph = load_graph(frozen_graph)
data_dir = './data'
num_classes = 2

import cv2
import datetime
import numpy as np
import cv2
import matplotlib.pyplot as plt

cap = cv2.VideoCapture('./1.avi')

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

img_shape = (160, 576)

out = cv2.VideoWriter('semantic_output_India.avi', cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))

frame_count = 0

with tf.Session(graph=graph) as sess:
    
    input_image = graph.get_tensor_by_name('import/image_input:0')
    keep_prob = graph.get_tensor_by_name('import/keep_prob:0')
    logits = graph.get_tensor_by_name('import/vsk_logits:0')
    
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    while(cap.isOpened()):
        ret, frame = cap.read()
        frame = frame.astype(float)
        
        frame_count += 1
        
        # Skip initial frames
        if (frame_count <= 500):
            continue
            
        print("Read frame number  = " + str(frame_count))
        inference_start_time = datetime.datetime.now()
        semantic_img = gen_test_output(sess, logits, keep_prob, input_image, img_shape, frame)
        inference_end_time = datetime.datetime.now()
        inference_time = inference_end_time - inference_start_time
        print("Total inference time in ms = " + str(inference_time.total_seconds() * 1000))
        print("Inference completed for frame number = " + str(frame_count))

        semantic_resized = scipy.misc.imresize(semantic_img, (frame_height,frame_width))

        open_cv_image = np.array(semantic_resized) 
        # Convert RGB to BGR 
        open_cv_image = open_cv_image[:, :, ::-1].copy()
        out.write(open_cv_image)
        print("Type of image = " + str(type(open_cv_image)))
        print("Original Image shape = " + str(frame.shape))
        print("Semantic Image shape = " + str(open_cv_image.shape))

        #out.write(frame)

        plt.imshow(open_cv_image)
        plt.show()
        
    cap.release()
    out.release()