In [1]:
%matplotlib inline
import tensorflow as tf
import numpy as np
import os
import scipy.io
import glob
import scipy.misc as misc
import warnings
from distutils.version import LooseVersion
import helper
import project_tests as tests
import skimage
from matplotlib import pyplot as plt

In [2]:
# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

TensorFlow Version: 1.8.0


In [3]:
# Check for a GPU
if not tf.test.gpu_device_name():
  warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
  print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

Default GPU Device: /device:GPU:0


In [4]:
DATA_DIRECTORY = './data'
RUNS_DIRECTORY = './runs'
TRAINING_DATA_DIRECTORY ='./data/data_liquid/training'
dirs = [x[0] for x in os.walk(TRAINING_DATA_DIRECTORY)]
TRAINING_DIRECTORIES = dirs[1:]
NUMBER_OF_IMAGES = 11192 #len(glob.glob('./data/data_liquid/testing/image_2/*.*'))
VGG_PATH = './data/vgg'

In [5]:
NUMBER_OF_CLASSES = 2
IMAGE_SHAPE = (480, 640)

EPOCHS = 4
BATCH_SIZE = 4

LEARNING_RATE = 0.0001
DROPOUT = 0.75

In [6]:
correct_label = tf.placeholder(tf.float32, [None, IMAGE_SHAPE[0], IMAGE_SHAPE[1], NUMBER_OF_CLASSES])
learning_rate = tf.placeholder(tf.float32)
keep_prob = tf.placeholder(tf.float32)

In [7]:
# Used for plotting to visualize if our training is going well given parameters
all_training_losses = []

In [8]:
def load_vgg(sess, vgg_path):
  """
  Load Pretrained VGG Model into TensorFlow.
  sess: TensorFlow Session
  vgg_path: Path to vgg folder, containing "variables/" and "saved_model.pb"
  return: Tuple of Tensors from VGG model (image_input, keep_prob, layer3, layer4, layer7)
  """
  # load the model and weights
  model = tf.saved_model.loader.load(sess, ['vgg16'], vgg_path)

  # Get Tensors to be returned from graph
  graph = tf.get_default_graph()
  image_input = graph.get_tensor_by_name('image_input:0')
  keep_prob = graph.get_tensor_by_name('keep_prob:0')
  layer3 = graph.get_tensor_by_name('layer3_out:0')
  layer4 = graph.get_tensor_by_name('layer4_out:0')
  layer7 = graph.get_tensor_by_name('layer7_out:0')

  return image_input, keep_prob, layer3, layer4, layer7

In [9]:
def conv_1x1(layer, layer_name):
  """ Return the output of a 1x1 convolution of a layer """
  return tf.layers.conv2d(inputs = layer,
                          filters =  NUMBER_OF_CLASSES,
                          kernel_size = (1, 1),
                          strides = (1, 1),
                          name = layer_name)

In [10]:
def upsample(layer, k, s, layer_name):
  """ Return the output of transpose convolution given kernel_size k and strides s """
  # See: http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#transposed-convolution-arithmetic
  return tf.layers.conv2d_transpose(inputs = layer,
                                    filters = NUMBER_OF_CLASSES,
                                    kernel_size = (k, k),
                                    strides = (s, s),
                                    padding = 'same',
                                    name = layer_name)

In [11]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes = NUMBER_OF_CLASSES):
  """
  Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
  vgg_layerX_out: TF Tensor for VGG Layer X output
  num_classes: Number of classes to classify
  return: The Tensor for the last layer of output
  """

  # Use a shorter variable name for simplicity
  layer3, layer4, layer7 = vgg_layer3_out, vgg_layer4_out, vgg_layer7_out

  # Apply a 1x1 convolution to encoder layers
  layer3x = conv_1x1(layer = layer3, layer_name = "layer3conv1x1")
  layer4x = conv_1x1(layer = layer4, layer_name = "layer4conv1x1")
  layer7x = conv_1x1(layer = layer7, layer_name = "layer7conv1x1")
 
  # Add decoder layers to the network with skip connections and upsampling
  # Note: the kernel size and strides are the same as the example in Udacity Lectures
  #       Semantic Segmentation Scene Understanding Lesson 10-9: FCN-8 - Decoder
  decoderlayer1 = upsample(layer = layer7x, k = 4, s = 2, layer_name = "decoderlayer1")
  decoderlayer2 = tf.add(decoderlayer1, layer4x, name = "decoderlayer2")
  decoderlayer3 = upsample(layer = decoderlayer2, k = 4, s = 2, layer_name = "decoderlayer3")
  decoderlayer4 = tf.add(decoderlayer3, layer3x, name = "decoderlayer4")
  decoderlayer_output = upsample(layer = decoderlayer4, k = 16, s = 8, layer_name = "decoderlayer_output")

  return decoderlayer_output

In [12]:
def layers_verbose(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes = NUMBER_OF_CLASSES):

  # Use a shorter variable name for simplicity
  layer3, layer4, layer7 = vgg_layer3_out, vgg_layer4_out, vgg_layer7_out

  # Apply a 1x1 convolution to encoder layers
  layer3x = conv_1x1(layer = layer3, layer_name = "layer3conv1x1")
  layer4x = conv_1x1(layer = layer4, layer_name = "layer4conv1x1")
  layer7x = conv_1x1(layer = layer7, layer_name = "layer7conv1x1")
 
  decoderlayer1 = upsample(layer = layer7x, k = 4, s = 2, layer_name = "decoderlayer1")
  decoderlayer2 = tf.add(decoderlayer1, layer4x, name = "decoderlayer2")
  decoderlayer3 = upsample(layer = decoderlayer2, k = 4, s = 2, layer_name = "decoderlayer3")
  decoderlayer4 = tf.add(decoderlayer3, layer3x, name = "decoderlayer4")
  decoderlayer_output = upsample(layer = decoderlayer4, k = 16, s = 8, layer_name = "decoderlayer_output")

  return layer3, layer4, layer7, layer3x, layer4x, layer7x, \
         decoderlayer1, decoderlayer2, decoderlayer3, decoderlayer4, decoderlayer_output


In [13]:
def optimize(nn_last_layer, correct_label, learning_rate, num_classes = NUMBER_OF_CLASSES):
  """
  Build the TensorFLow loss and optimizer operations.
  nn_last_layer: TF Tensor of the last layer in the neural network
  correct_label: TF Placeholder for the correct label image
  learning_rate: TF Placeholder for the learning rate
  num_classes: Number of classes to classify
  return: Tuple of (logits, train_op, cross_entropy_loss)
  """
  # reshape 4D tensors to 2D
  # Each row represents a pixel, each column a class
  logits = tf.reshape(nn_last_layer, (-1, num_classes))
  class_labels = tf.reshape(correct_label, (-1, num_classes))

  # The cross_entropy_loss is the cost which we are trying to minimize to yield higher accuracy
  cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = class_labels)
  cross_entropy_loss = tf.reduce_mean(cross_entropy)

  # The model implements this operation to find the weights/parameters that would yield correct pixel labels
  train_op = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy_loss)

  return logits, train_op, cross_entropy_loss

In [14]:
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op,
             cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate, saver):
  """
  Train neural network and print out the loss during training.
  sess: TF Session
  epochs: Number of epochs
  batch_size: Batch size
  get_batches_fn: Function to get batches of training data.  Call using get_batches_fn(batch_size)
  train_op: TF Operation to train the neural network
  cross_entropy_loss: TF Tensor for the amount of loss
  input_image: TF Placeholder for input images
  correct_label: TF Placeholder for label images
  keep_prob: TF Placeholder for dropout keep probability
  learning_rate: TF Placeholder for learning rate
  """

  for epoch in range(EPOCHS):
    
    losses, i = [], 0
    
    for images, labels in get_batches_fn(BATCH_SIZE):
        
      i += 1
    
      feed = { input_image: images,
               correct_label: labels,
               keep_prob: DROPOUT,
               learning_rate: LEARNING_RATE }
        
      _, partial_loss = sess.run([train_op, cross_entropy_loss], feed_dict = feed)
      
      print("---> iteration: ", i, " partial loss:", partial_loss)
      losses.append(partial_loss)
      
      if i % 100 == 0:
        # Save model
        saver.save(sess, "./model.ckpt")  
          
    training_loss = sum(losses) / len(losses)
    all_training_losses.append(training_loss)
    
    print("------------------")
    print("epoch: ", epoch + 1, " of ", EPOCHS, "training loss: ", training_loss)
    print("------------------")

In [15]:
def run_tests():
  tests.test_layers(layers)
  tests.test_optimize(optimize)
  tests.test_for_kitti_dataset(DATA_DIRECTORY)
  tests.test_train_nn(train_nn)

In [16]:
def run():
  print("NUMBER OF IMAGES:", NUMBER_OF_IMAGES)

  # download vgg model
  helper.maybe_download_pretrained_vgg(DATA_DIRECTORY)

  # A function to get batches
  get_batches_fn = helper.gen_batch_function(TRAINING_DIRECTORIES, IMAGE_SHAPE)

 
  
  with tf.Session() as session:
        
    # Returns the three layers, keep probability and input layer from the vgg architecture
    image_input, keep_prob, layer3, layer4, layer7 = load_vgg(session, VGG_PATH)

    # The resulting network architecture, adding a decoder on top of the given vgg model
    model_output = layers(layer3, layer4, layer7, NUMBER_OF_CLASSES)

    # Returns the output logits, training operation and cost operation to be used
    # For the logits: each row represents a pixel, each column a class
    # training operation is what is used to get the right parameters to the model to correctly label the pixels
    # the cross entropy loss is the cost which we are minimizing, lower cost should yield higher accuracy
    logits, train_op, cross_entropy_loss = optimize(model_output, correct_label, learning_rate, NUMBER_OF_CLASSES)
    
    # Initilize all variables
    session.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
    
    # Create saver
    saver = tf.train.Saver()

    # train the neural network
    train_nn(session, EPOCHS, BATCH_SIZE, get_batches_fn, 
             train_op, cross_entropy_loss, image_input,
             correct_label, keep_prob, learning_rate, saver)
    
    # Save inference data
    helper.save_inference_samples(RUNS_DIRECTORY, DATA_DIRECTORY, session, IMAGE_SHAPE, logits, keep_prob, image_input)    

In [17]:
def network_shapes():
  with tf.Session() as sess:
    x = np.random.randn(1, 160, 576, 3)
    
    image_input, keep_prob, layer3, layer4, layer7 = load_vgg(sess, VGG_PATH)
 
    op = layers_verbose(layer3, layer4, layer7, NUMBER_OF_CLASSES)
  
    sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

    l3, l4, l7, l3x, l4x, l7x, d1, s2, d3, s4, d5 = sess.run(op, feed_dict = {image_input: x, keep_prob: 1.0})

    print("------------------")
    print("shapes of layers:") 
    print("------------------")

    print("layer3 -->", l3.shape)
    print("layer4 -->", l4.shape)
    print("layer7 -->", l7.shape)
    print("layer3 conv1x1 -->", l3x.shape)
    print("layer4 conv1x1 -->", l4x.shape)
    print("layer7 conv1x1-->", l7x.shape)
    print("decoderlayer1 transpose: layer7 k = 4 s = 2 -->", d1.shape)
    print("decoderlayer2 skip: decoderlayer1 and layer4conv1x1 -->", s2.shape)
    print("decoderlayer3 transpose: decoderlayer2 k = 4 s = 2 -->", d3.shape)
    print("decoderlayer4 skip: decoderlayer3 and layer3conv1x1 -->", s4.shape)
    print("decoderlayer5 transpose: decoderlayer4 k = 16 s = 8 -->", d5.shape)

In [None]:
# Train the network
run()

NUMBER OF IMAGES: 11192
INFO:tensorflow:Restoring parameters from b'./data/vgg/variables/variables'
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

---> iteration:  1  partial loss: 48.45235
---> iteration:  2  partial loss: 43.40933


In [None]:
all_training_losses

In [None]:
tf.reset_default_graph()
    
with tf.Session() as session:
    # Restore variables and model
    saver = tf.train.import_meta_graph("./model.ckpt.meta")
    print("Model restored.")
    
   
    # Get the three layers, keep probability and input layer
    graph = tf.get_default_graph()
    image_input = graph.get_tensor_by_name('image_input:0')
    keep_prob = graph.get_tensor_by_name('keep_prob:0')
    layer3 = graph.get_tensor_by_name('layer3_out:0')
    layer4 = graph.get_tensor_by_name('layer4_out:0')
    layer7 = graph.get_tensor_by_name('layer7_out:0')

    # The resulting network architecture, adding a decoder on top of the given vgg model
    model_output = layers(layer3, layer4, layer7, NUMBER_OF_CLASSES)

    logits = tf.reshape(model_output, (-1, NUMBER_OF_CLASSES))
    print('Graph constructed')
    
    # Initilize all variables
    session.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
    print('Variables initialized')
    
    # Restore variables that were trained from previous session
    saver.restore(session, tf.train.latest_checkpoint("./"))
    
    # TEST IMAGE
    test_image = scipy.misc.imresize(scipy.misc.imread("./data/data_liquid/testing/image_2/data0257.png"), IMAGE_SHAPE)
    
    # Make predictions
    feed = {keep_prob: 1.0, image_input: [test_image]}
    im_softmax = session.run( [tf.nn.softmax(logits)], feed)
    im_softmax = im_softmax[0][:, 1].reshape(IMAGE_SHAPE[0], IMAGE_SHAPE[1])
    segmentation = (im_softmax > 0.5).reshape(IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1)
    mask = np.dot(segmentation, np.array([[0, 255, 0, 127]]))
    mask = scipy.misc.toimage(mask, mode="RGBA")
    result = scipy.misc.toimage(test_image)
    result.paste(mask, box=None, mask=mask)
    plt.imshow(result)
    plt.show()  
   
    
    #helper.save_inference_samples(RUNS_DIRECTORY, DATA_DIRECTORY, session, IMAGE_SHAPE, logits, keep_prob, image_input)

In [None]:
def run_tests():
  print("NUMBER OF IMAGES:", NUMBER_OF_IMAGES)

  # download vgg model
  helper.maybe_download_pretrained_vgg(DATA_DIRECTORY)

  # A function to get batches
  get_batches_fn = helper.gen_batch_function(TRAINING_DATA_DIRECTORY, IMAGE_SHAPE)
  tf.reset_default_graph()
   
  with tf.Session() as session:
    
    # Restore variables and model
    saver = tf.train.import_meta_graph("./fcn_liquid_model-500.meta")
    saver.restore(session, tf.train.latest_checkpoint("./"))
    print("Model restored.")
    
     # Initilize all variables
    session.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
        
    # load the model and weights
    model = tf.saved_model.loader.load(session, ['vgg16'], VGG_PATH)

    # Get Tensors to be returned from graph
    graph = tf.get_default_graph()
    image_input = graph.get_tensor_by_name('image_input:0')
    keep_prob = graph.get_tensor_by_name('keep_prob:0')
    layer3 = graph.get_tensor_by_name('layer3_out:0')
    layer4 = graph.get_tensor_by_name('layer4_out:0')
    layer7 = graph.get_tensor_by_name('layer7_out:0')
    class_labels = tf.reshape(correct_label, (-1, NUMBER_OF_CLASSES))

    # The resulting network architecture, adding a decoder on top of the given vgg model
    model_output = layers(layer3, layer4, layer7, NUMBER_OF_CLASSES)
    
    # reshape 4D tensors to 2D
    # Each row represents a pixel, each column a class
    logits = tf.reshape(model_output, (-1, NUMBER_OF_CLASSES))
   

    # The cross_entropy_loss is the cost which we are trying to minimize to yield higher accuracy
    #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = class_labels)
    #cross_entropy_loss = tf.reduce_mean(cross_entropy)

    # The model implements this operation to find the weights/parameters that would yield correct pixel labels
    #train_op = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy_loss)    
    
    # TEST IMAGE
    test_image = scipy.misc.imresize(scipy.misc.imread("./data/data_liquid/testing/image_2/data0257.png"), IMAGE_SHAPE)
    
    im_softmax = session.run(
            [tf.nn.softmax(logits)],
            {keep_prob: 1.0, image_input: [test_image]})
    """
    im_softmax = im_softmax[0][:, 1].reshape(IMAGE_SHAPE[0], IMAGE_SHAPE[1])
    segmentation = (im_softmax > 0.5).reshape(IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1)
    mask = np.dot(segmentation, np.array([[0, 255, 0, 127]]))
    mask = scipy.misc.toimage(mask, mode="RGBA")
    result = scipy.misc.toimage(test_image)
    result.paste(mask, box=None, mask=mask)
    plt.imshow(result)
    plt.show() """
   
    # Save inference data
    #helper.save_inference_samples(RUNS_DIRECTORY, DATA_DIRECTORY, session, IMAGE_SHAPE, logits, keep_prob, image_input)    

In [None]:
[n.name for n in tf.get_default_graph().as_graph_def().node]

In [None]:
def run_traintest():
  print("NUMBER OF IMAGES:", NUMBER_OF_IMAGES)

  # download vgg model
  helper.maybe_download_pretrained_vgg(DATA_DIRECTORY)

  # A function to get batches
  get_batches_fn = helper.gen_batch_function(TRAINING_DIRECTORIES, IMAGE_SHAPE)

 
  
  with tf.Session() as session:
        
    # Returns the three layers, keep probability and input layer from the vgg architecture
    image_input, keep_prob, layer3, layer4, layer7 = load_vgg(session, VGG_PATH)

    # The resulting network architecture, adding a decoder on top of the given vgg model
    model_output = layers(layer3, layer4, layer7, NUMBER_OF_CLASSES)

    # Returns the output logits, training operation and cost operation to be used
    # For the logits: each row represents a pixel, each column a class
    # training operation is what is used to get the right parameters to the model to correctly label the pixels
    # the cross entropy loss is the cost which we are minimizing, lower cost should yield higher accuracy
    logits, train_op, cross_entropy_loss = optimize(model_output, correct_label, learning_rate, NUMBER_OF_CLASSES)
    
    # Create saver
    saver = tf.train.Saver()
    
    # Initilize all variables
    session.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

    # train the neural network
    train_nn(session, EPOCHS, BATCH_SIZE, get_batches_fn, 
             train_op, cross_entropy_loss, image_input,
             correct_label, keep_prob, learning_rate)
    
    # Save inference data
    helper.save_inference_samples(RUNS_DIRECTORY, DATA_DIRECTORY, session, IMAGE_SHAPE, logits, keep_prob, image_input)    
    
    # Save model
    saver.save(session, "./fcn_liquid_model", global_step = 500)

In [None]:
tf.reset_default_graph()

with tf.Session() as session:
    # Restore model graph
    saver = tf.train.import_meta_graph("./model.ckpt.meta")

    # Get the three layers, keep probability and input layer
    graph = tf.get_default_graph()
    image_input = graph.get_tensor_by_name('image_input:0')
    keep_prob = graph.get_tensor_by_name('keep_prob:0')
    layer3 = graph.get_tensor_by_name('layer3_out:0')
    layer4 = graph.get_tensor_by_name('layer4_out:0')
    layer7 = graph.get_tensor_by_name('layer7_out:0')

    # The resulting network architecture, adding a decoder on top of the given vgg model
    model_output = layers(layer3, layer4, layer7, NUMBER_OF_CLASSES)
    logits = tf.reshape(model_output, (-1, NUMBER_OF_CLASSES))
    print('Graph constructed')

	# Initialize all variables
    session.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
    print('Variables initialized')

    # Restore variables that were trained from previous session (a subset of all global & local variables).
    saver.restore(session, tf.train.latest_checkpoint("./"))
    print("Trained Variables restored from previous session.")
    
    print([n.name for n in tf.get_default_graph().as_graph_def().node])

    # load image
    test_image = scipy.misc.imresize(scipy.misc.imread("./data/data_liquid/testing/image_2/data0257.png"), IMAGE_SHAPE)

    # Make predictions
    feed = {keep_prob: 1.0, image_input: [test_image]}
    im_softmax = session.run( [tf.nn.softmax(logits)], feed)
    im_softmax = im_softmax[0][:, 1].reshape(IMAGE_SHAPE[0], IMAGE_SHAPE[1])
    segmentation = (im_softmax > 0.5).reshape(IMAGE_SHAPE[0], IMAGE_SHAPE[1], 1)
    mask = np.dot(segmentation, np.array([[0, 255, 0, 127]]))
    mask = scipy.misc.toimage(mask, mode="RGBA")
    result = scipy.misc.toimage(test_image)
    result.paste(mask, box=None, mask=mask)
    plt.imshow(result)
    plt.show()

In [None]:
from glob import glob
import re

directory = "./data/data_liquid/training"
dirs = [x[0] for x in os.walk(directory)]
#print(dirs[1:])
dirs = dirs[1:]

image_paths = glob(os.path.join(TRAINING_DATA_DIRECTORY, 'image_2', '*.png'))
#print(image_paths)

x = 0
for d in dirs:
    ip = glob(os.path.join(d, 'data*.png'))
    x += len(ip)

print(x)
#print(ip)

if image_paths == ip:
    print("Yes")
else:
    print("No")

label_paths = {
                re.sub(r'ground_truth', 'data', os.path.basename(path)): path
                for path in glob(os.path.join(TRAINING_DATA_DIRECTORY, 'gt_image_2', '*.png'))}

#print(label_paths)


lp = {
       re.sub(r'ground_truth', 'data', os.path.basename(path)): path
       for path in glob(os.path.join(dirs[1], 'ground_truth*.png'))}

print(lp)



if label_paths == lp:
    print("Yes")
else:
    print("No")