In [1]:
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import os, sys
import numpy as np
import math
import skimage
import skimage.io

IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
IMAGE_DEPTH = 3

NUM_CLASSES = 11
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 367
NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 101
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 1

def _generate_image_and_label_batch(image, label, min_queue_examples,
                                    batch_size, shuffle):
  """Construct a queued batch of images and labels.

  Args:
    image: 3-D Tensor of [height, width, 3] of type.float32.
    label: 3-D Tensor of [height, width, 1] type.int32
    min_queue_examples: int32, minimum number of samples to retain
      in the queue that provides of batches of examples.
    batch_size: Number of images per batch.
    shuffle: boolean indicating whether to use a shuffling queue.

  Returns:
    images: Images. 4D tensor of [batch_size, height, width, 3] size.
    labels: Labels. 3D tensor of [batch_size, height, width ,1] size.
  """
  # Create a queue that shuffles the examples, and then
  # read 'batch_size' images + labels from the example queue.
  num_preprocess_threads = 1
  if shuffle:
    images, label_batch = tf.train.shuffle_batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size,
        min_after_dequeue=min_queue_examples)
  else:
    images, label_batch = tf.train.batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size)

  # Display the training images in the visualizer.
  # tf.image_summary('images', images)

  return images, label_batch

def CamVid_reader_seq(filename_queue, seq_length):
  image_seq_filenames = tf.split(axis=0, num_or_size_splits=seq_length, value=filename_queue[0])
  label_seq_filenames = tf.split(axis=0, num_or_size_splits=seq_length, value=filename_queue[1])

  image_seq = []
  label_seq = []
  for im ,la in zip(image_seq_filenames, label_seq_filenames):
    imageValue = tf.read_file(tf.squeeze(im))
    labelValue = tf.read_file(tf.squeeze(la))
    image_bytes = tf.image.decode_png(imageValue)
    label_bytes = tf.image.decode_png(labelValue)
    image = tf.cast(tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)), tf.float32)
    label = tf.cast(tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1)), tf.int64)
    image_seq.append(image)
    label_seq.append(label)
  return image_seq, label_seq

def CamVid_reader(filename_queue):

  image_filename = filename_queue[0]
  label_filename = filename_queue[1]

  imageValue = tf.read_file(image_filename)
  labelValue = tf.read_file(label_filename)

  image_bytes = tf.image.decode_png(imageValue)
  label_bytes = tf.image.decode_png(labelValue)

  image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
  label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1))

  return image, label
#This is the first part that we need to run to get the names
def get_filename_list(path):
  fd = open(path)
  image_filenames = []
  label_filenames = []
  filenames = []
  for i in fd:
    i = i.strip().split(" ")
    image_filenames.append(i[0])
    label_filenames.append(i[1])
  return image_filenames, label_filenames

#This is the second part that we need to run to get filename_queue which is a tensor
def CamVidInputs(image_filenames, label_filenames, batch_size):

  images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string)
  labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string)

  filename_queue = tf.train.slice_input_producer([images, labels], shuffle=True)
  
    #Then we need CamVid_reader to get the image and label.
  image, label = CamVid_reader(filename_queue)
  reshaped_image = tf.cast(image, tf.float32)

  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CamVid images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(reshaped_image, label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)
def get_all_test_data(im_list, la_list):
  images = []
  labels = []
  index = 0
  for im_filename, la_filename in zip(im_list, la_list):
    im = np.array(skimage.io.imread(im_filename), np.float32)
    im = im[np.newaxis]
    la = skimage.io.imread(la_filename)
    la = la[np.newaxis]
    la = la[...,np.newaxis]
    images.append(im)
    labels.append(la)
  return images, labels
path = '../SegNet/CamVid/train.txt'
batch_size = 5
# image_filenames,label_filenames=get_filename_list(path)
# image,label_batch = CamVidInputs(image_filenames,label_filenames,batch_size)

In [2]:
# %load segnet_model.py

import os

import numpy as np
import tensorflow as tf
import time
import math

VGG_MEAN = [103.939, 116.779, 123.68]
shape_specific=[360,480,1]


def vgg_param_load(vgg16_npy_path): 
    vgg_param_dict = np.load(vgg16_npy_path,encoding='latin1').item()
    return vgg_param_dict
    for key in vgg_param_dict:
        print(key,vgg_param_dict[key][0].shape,vgg_param_dict[key][1].shape)
    print("vgg parameter loaded")
   
    

In [3]:
vgg16_npy_path = "vgg16.npy"
vgg_param_dict = vgg_param_load(vgg16_npy_path)

In [4]:
def avg_pool(bottom, name):
    return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

def max_pool(inputs,name):
    value,index = tf.nn.max_pool_with_argmax(inputs,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name=name)
    print('value shape',value.shape)
    print('index shape',index.shape)                          
    return value,index,inputs.get_shape().as_list()
#here value is the max value, index is the corresponding index, the detail information is here https://www.tensorflow.org/versions/r1.0/api_docs/python/tf/nn/max_pool_with_argmax
    
def conv_layer(bottom, name, training_state):
    with tf.variable_scope(name):
        filt = get_conv_filter(name)
        conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
        conv_biases = get_bias(name)
        bias = tf.nn.bias_add(conv, conv_biases)
        out = batch_norm(bias,training_state,name)
            
        relu = tf.nn.relu(out)
        print(relu)
        return relu
        #norm is used to identify if we should use batch normalization!
        
def batch_norm(bias_input, is_training, scope):
    if is_training is True:
        return tf.contrib.layers.batch_norm(bias_input,is_training = True,center = False,
                                                      scope = scope+"_bn")
    else:
        return tf.contrib.layers.batch_norm(bias_input,is_training = False,center = False,
                                                      scope = scope+"_bn", reuse = True)
#is_training = Ture, it will accumulate the statistics of the movements into moving_mean and moving_variance. When it's 
#not in a training mode, then it would use the values of the moving_mean, and moving_variance. Which is exactly what we want,
#since when it's not training, we are not allowed to use the actual mean and variance for the validation data. 
#reuse is that if we will reuse the layers, which I really don't understand what does that mean

def get_conv_filter(name):
    return tf.constant(vgg_param_dict[name][0], name="filter")
    #so here load the weight for VGG-16, which is kernel, the kernel size for different covolution layers will show in function
    #vgg_param_load

def get_bias(name):
    return tf.constant(vgg_param_dict[name][1], name="biases")
    #here load the bias for VGG-16, the bias size will be 64,128,256,512,512, also shown in function vgg_param_load
    

def unravel_index(indices, shape):
    with tf.name_scope('unravel_index'):
        indices = tf.to_int64(tf.expand_dims(indices, 0))
        shape = tf.to_int64(tf.expand_dims(shape, 1))
        strides = tf.to_int64(tf.cumprod(shape, reverse=True))
        strides_shifted = tf.to_int64(tf.cumprod(shape, exclusive=True, reverse=True))
        return (indices % strides) // strides_shifted
    #This function is utilized to transform the flattened maxpooling index to the original 4D tensor, and have already test
    #it, which works brilliant!
    """
    indices: indices will be the output index from maxpooling, just to make sure if it's only 1D!
    Shape: the shape of the original data,[batch_size,height,width,Num_of_Channels]
    output: It's the 4D maxpooling indices!
    """
    
def up_sampling(max_values,max_indices,shape):
    """
    Inputs:
    value: the maximum value from maxpooling function, value need to be a tensor. The most important thing for
    value is that it needs to be reshaped to be only one column! 
    indices: the flattened position for the maximum value from maxpooling function. Also indices need to be reshaped
    to be two dimension. [Num_tot_values,4]. 4 is because we have 4 dimension
    shape: the shape of the original data, [batch_size,height,width,Num_of_Channels]
    Outputs:
    up_sample_sp: The sparse matrix from the up_sampling.
    """
    values_reshape = tf.reshape(max_values,[-1])
    indices_reshape = tf.reshape(max_indices,[-1])
    print('The shape of reshaped maxindex',indices_reshape.shape)
    pooling_index_4d = tf.stack(tf.unstack(unravel_index(indices_reshape,shape), axis=0), axis=1)
    print('The shape of 4d indices', pooling_index_4d.shape)
    sp_tensor = tf.SparseTensor(pooling_index_4d, values = values_reshape, dense_shape = shape)
    sp_dense = tf.sparse_tensor_to_dense(sp_tensor, validate_indices=False)
    print('The shape of sparse matrix', sp_dense.shape)
    return sp_dense 


def _initialization(k,c):
    """
    Here the reference paper is https:arxiv.org/pdf/1502.01852
    k is the filter size
    c is the number of input channels in the filter tensor
    we assume for all the layers, the Kernel Matrix follows a gaussian distribution N(0, \sqrt(2/nl)), where nl is 
    the total number of units in the input, k^2c, k is the spartial filter size and c is the number of input channels. 
    Output:
    The initialized weight
    """
    std = math.sqrt(2. / (k**2 * c))
    return tf.truncated_normal_initializer(stddev = std)



    
def deconv_layer(inputs,kernel_size,output_shape,name,training_state):
    """
    This deconv_layer is utilized to convolve with the upsampled output, and also layer output
    output_shape is different for different layers
    The kernel_size = [height,width,output_channel,input_channel]
    """
    bias_shape = output_shape[-1]
    weight_shape = kernel_size
    k = kernel_size[0]
    c = kernel_size[3]
    with tf.variable_scope(name):
        weights = tf.get_variable(name+"weight",shape=weight_shape,initializer = _initialization(k,c))
        bias = tf.get_variable(name+"bias",shape = bias_shape, initializer = tf.constant_initializer(0.1))
        deconv = tf.nn.conv2d_transpose(inputs,weights,output_shape = output_shape,strides = [1,1,1,1],padding='SAME')
        bias = tf.nn.bias_add(deconv, bias)
        out = batch_norm(bias,training_state,name)
         
        relu = tf.nn.relu(out)
        print(relu)
        return relu

In [5]:
im_files, lab_files = get_filename_list('../SegNet/CamVid/test.txt')
im_files = [".." + n for n in im_files]
lab_files = [".." + n for n in lab_files]


image, labels = get_all_test_data(im_files, lab_files)

In [6]:
images = image[0]
training_state = True
conv1_1 = conv_layer(images, "conv1_1",training_state)
conv1_2 = conv_layer(conv1_1, "conv1_2",training_state)
pool1,pool1_index,shape_1 = max_pool(conv1_2, 'pool1')
    
    #Second box of covolution layer(4)
conv2_1 = conv_layer(pool1, "conv2_1",training_state)
conv2_2 = conv_layer(conv2_1, "conv2_2",training_state)
pool2,pool2_index,shape_2 = max_pool(conv2_2, 'pool2')
    
    #Third box of covolution layer(7)
conv3_1 = conv_layer(pool2, "conv3_1",training_state)
conv3_2 = conv_layer(conv3_1, "conv3_2",training_state)
conv3_3 = conv_layer(conv3_2, "conv3_3",training_state)
pool3,pool3_index,shape_3 = max_pool(conv3_3, 'pool3')
    
    #Fourth box of covolution layer(10)
conv4_1 = conv_layer(pool3, "conv4_1",training_state)
conv4_2 = conv_layer(conv4_1, "conv4_2",training_state)
conv4_3 = conv_layer(conv4_2, "conv4_3",training_state)
pool4,pool4_index,shape_4 = max_pool(conv4_3, 'pool4')

    #Fifth box of covolution layers(13)
conv5_1 = conv_layer(pool4, "conv5_1",training_state)
conv5_2 = conv_layer(conv5_1, "conv5_2",training_state)
conv5_3 = conv_layer(conv5_2, "conv5_3",training_state)
pool5, pool5_index,shape_5 = max_pool(conv5_3, 'pool5')
        
        
    #---------------------So Now the encoder process has been Finished--------------------------------------#
    #------------------Then Let's start Decoder Process-----------------------------------------------------#
    
    #First box of decovolution layers(3)
deconv1_1 = up_sampling(pool5, pool5_index,shape=shape_5)
deconv1_2 = deconv_layer(deconv1_1,[3,3,512,512],shape_5,"deconv1_2",training_state)
deconv1_3 = deconv_layer(deconv1_2,[3,3,512,512],shape_5,"deconv1_3",training_state)
deconv1_4 = deconv_layer(deconv1_3,[3,3,512,512],shape_5,"deconv1_4",training_state)
    
    #Second box of deconvolution layers(6)
deconv2_1 = up_sampling(deconv1_4,pool4_index,shape = shape_4)
deconv2_2 = deconv_layer(deconv2_1,[3,3,512,512],shape_4,"deconv2_2",training_state)
deconv2_3 = deconv_layer(deconv2_2,[3,3,512,512],shape_4,"deconv2_3",training_state)
deconv2_4 = deconv_layer(deconv2_3,[3,3,256,512],[1,45,60,256],"deconv2_4",training_state)
   
    #Third box of deconvolution layers(9)
deconv3_1 = up_sampling(deconv2_4,pool3_index,shape = shape_3)
deconv3_2 = deconv_layer(deconv3_1,[3,3,256,256],shape_3,"deconv3_2",training_state)
deconv3_3 = deconv_layer(deconv3_2,[3,3,256,256],shape_3,"deconv3_3",training_state)

deconv3_4 = deconv_layer(deconv3_3,[3,3,128,256],[1,90,120,128],"deconv3_4",training_state)
    
    #Fourth box of deconvolution layers(11)
deconv4_1 = up_sampling(deconv3_4,pool2_index,shape = shape_2)
deconv4_2 = deconv_layer(deconv4_1,[3,3,128,128],shape_2,"deconv4_2",training_state)
deconv4_3 = deconv_layer(deconv4_2,[3,3,64,128],[1,180,240,64],"deconv4_3",training_state)
    
    #Fifth box of deconvolution layers(13)
deconv5_1 = up_sampling(deconv4_3,pool1_index,shape = shape_1)
deconv5_2 = deconv_layer(deconv5_1,[3,3,64,64],shape_1,"deconv5_2",training_state)
deconv5_3 = deconv_layer(deconv5_2,[3,3,11,64],[shape_1[0],shape_1[1],shape_1[2],11],"deconv5_3",training_state)
    
prob = tf.nn.softmax(deconv5_3,name = "prob")

Tensor("conv1_1/Relu:0", shape=(1, 360, 480, 64), dtype=float32)
Tensor("conv1_2/Relu:0", shape=(1, 360, 480, 64), dtype=float32)
value shape (1, 180, 240, 64)
index shape (1, 180, 240, 64)
Tensor("conv2_1/Relu:0", shape=(1, 180, 240, 128), dtype=float32)
Tensor("conv2_2/Relu:0", shape=(1, 180, 240, 128), dtype=float32)
value shape (1, 90, 120, 128)
index shape (1, 90, 120, 128)
Tensor("conv3_1/Relu:0", shape=(1, 90, 120, 256), dtype=float32)
Tensor("conv3_2/Relu:0", shape=(1, 90, 120, 256), dtype=float32)
Tensor("conv3_3/Relu:0", shape=(1, 90, 120, 256), dtype=float32)
value shape (1, 45, 60, 256)
index shape (1, 45, 60, 256)
Tensor("conv4_1/Relu:0", shape=(1, 45, 60, 512), dtype=float32)
Tensor("conv4_2/Relu:0", shape=(1, 45, 60, 512), dtype=float32)
Tensor("conv4_3/Relu:0", shape=(1, 45, 60, 512), dtype=float32)
value shape (1, 23, 30, 512)
index shape (1, 23, 30, 512)
Tensor("conv5_1/Relu:0", shape=(1, 23, 30, 512), dtype=float32)
Tensor("conv5_2/Relu:0", shape=(1, 23, 30, 512), dt

In [7]:
print(shape_5, shape_4, shape_3)

[1, 23, 30, 512] [1, 45, 60, 512] [1, 90, 120, 256]


In [8]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    print(prob)
    result = sess.run(tf.argmax(prob, axis=-1))

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Tensor("prob:0", shape=(1, 360, 480, 11), dtype=float32)


In [9]:
from Utils import writeImage



In [11]:
writeImage(result[0], 'pred_image.png')