In [1]:
# %load main.py
import os.path
import tensorflow as tf
import helper
import warnings
from tqdm import tqdm
from distutils.version import LooseVersion
import project_tests as tests


# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.1.0




In [2]:
# https://github.com/ooleksyuk/CarND-Semantic-Segmentation/blob/master/main.py

In [3]:
NUM_CLASSES = 2
KEEP_PROB = 1.0
LEARNING_RATE = 1e-3
EPOCHS = 10
BATCH_SIZE = 32

In [4]:
def load_vgg(sess, vgg_path):
    """
    Load Pretrained VGG Model into TensorFlow.
    :param sess: TensorFlow Session
    :param vgg_path: Path to vgg folder, containing "variables/" and "saved_model.pb"
    :return: Tuple of Tensors from VGG model (image_input, keep_prob, layer3_out, layer4_out, layer7_out)
    """
    vgg_tag = 'vgg16'
    layers = ('image_input:0', 'keep_prob:0', 'layer3_out:0', 'layer4_out:0', 'layer7_out:0')
    
    tf.saved_model.loader.load(sess, [vgg_tag], vgg_path)
    graph = tf.get_default_graph()
    return (graph.get_tensor_by_name(l) for l in layers)

tests.test_load_vgg(load_vgg, tf)

Tests Passed


In [19]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
    """
    Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
    :param vgg_layer3_out: TF Tensor for VGG Layer 3 output
    :param vgg_layer4_out: TF Tensor for VGG Layer 4 output
    :param vgg_layer7_out: TF Tensor for VGG Layer 7 output
    :param num_classes: Number of classes to classify
    :return: The Tensor for the last layer of output
    """
    # TODO: Add skip connections
    conv_1x1 = tf.layers.conv2d(vgg_layer7_out, num_classes, 1, padding='same')
    
    
    
    return tf.layers.conv2d_transpose(conv_1x1, num_classes, 64, strides=(32, 32), padding='same')

tests.test_layers(layers)

In [20]:
def optimize(nn_last_layer, correct_label, learning_rate, num_classes):
    """
    Build the TensorFLow loss and optimizer operations.
    :param nn_last_layer: TF Tensor of the last layer in the neural network
    :param correct_label: TF Placeholder for the correct label image
    :param learning_rate: TF Placeholder for the learning rate
    :param num_classes: Number of classes to classify
    :return: Tuple of (logits, train_op, cross_entropy_loss)
    """
    logits = tf.reshape(nn_last_layer, (-1, num_classes))
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=correct_label))
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return logits, optimizer, loss

tests.test_optimize(optimize)

In [23]:
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate):
    """
    Train neural network and print out the loss during training.
    :param sess: TF Session
    :param epochs: Number of epochs
    :param batch_size: Batch size
    :param get_batches_fn: Function to get batches of training data.  Call using get_batches_fn(batch_size)
    :param train_op: TF Operation to train the neural network
    :param cross_entropy_loss: TF Tensor for the amount of loss
    :param input_image: TF Placeholder for input images
    :param correct_label: TF Placeholder for label images
    :param keep_prob: TF Placeholder for dropout keep probability
    :param learning_rate: TF Placeholder for learning rate
    """
    losses = []
    for epoch in range(epochs):
        desc = "Epoch {0}/{1}".format(epoch, epochs)
        for images, labels in tqdm(get_batches_fn(batch_size), desc=desc):
            _, loss = sess.run([train_op, cross_entropy_loss], 
                              feed_dict={input_image: images, 
                                         correct_label: labels, 
                                         keep_prob: 1.0, 
                                         learning_rate: 1e-3})
            losses.append(loss)
    return losses
            
    
tests.test_train_nn(train_nn)


Epoch 0/1:   0%|          | 0/2 [00:00<?, ?it/s][A
Epoch 0/1: 100%|██████████| 2/2 [00:00<00:00, 280.19it/s][A

In [22]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

image_shape = (160, 576)
data_dir = './data'
runs_dir = './runs'
tests.test_for_kitti_dataset(data_dir)

# Download pretrained vgg model
helper.maybe_download_pretrained_vgg(data_dir)

# OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset.
# You'll need a GPU with at least 10 teraFLOPS to train on.
#  https://www.cityscapes-dataset.com/

tf.reset_default_graph()
with tf.Session() as sess:
    # Path to vgg model
    vgg_path = os.path.join(data_dir, 'vgg')
    # Create function to get batches
    get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape)

    # OPTIONAL: Augment Images for better results
    #  https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network

    # TODO: Build NN using load_vgg, layers, and optimize function
    load_vgg(sess, vgg_path)

    input_image, keep_prob, layer3, layer4, layer7 = load_vgg(sess, vgg_path)
    output_layer = layers(layer3, layer4, layer7, NUM_CLASSES)

    correct_label = tf.placeholder(dtype=tf.float32, shape=(None, None, None, NUM_CLASSES), name='correct_label')
    learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')

    logits, train_op, cross_entropy_loss = optimize(output_layer, correct_label, learning_rate, NUM_CLASSES)
    
    print("Initialize variables")
    sess.run(tf.global_variables_initializer())

    print("Training network")
    losses = train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate)

#     helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
    # OPTIONAL: Apply the trained model to a video
    print("Done")


Epoch 0/10: 0it [00:00, ?it/s][A
Epoch 0/10: 4it [07:24, 112.61s/it]

KeyboardInterrupt: 