# Segnet Implementation

In [1]:
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import numpy as np 
import matplotlib.pyplot as plt
from random import randint 
import math
import os 
import cv2 
from math import ceil
import skimage
import skimage.io
import skimage.transform
from datetime import datetime
import time

  from ._conv import register_converters as _register_converters


## Constraints 

In [2]:
IMAGE_HEIGHT = 300
IMAGE_WIDTH = 600 
IMAGE_DEPTH = 3 
RESIZED_IMAGE_HEIGHT = 256 
RESIZED_IMAGE_WIDTH = 512

NUM_CLASSES = 2 
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 25
NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 2
NUM_EXAMPLES_FOR_EPOCH_FOR_EVAL = 2
BATCH_SIZE = 3
TEST_ITER = NUM_EXAMPLES_PER_EPOCH_FOR_TEST / BATCH_SIZE

NUM_LABELS = 2 
NUM_EPOCHS = 2

In [3]:
MOVING_AVERAGE_DECAY = 0.9999     # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0      # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.

INITIAL_LEARNING_RATE = 0.001      # Initial learning rate.
EVAL_BATCH_SIZE = 1
OPTIMIZER = "adam"

<P> <P/> 

## Data Handling  

In [4]:
data_dir = '/data/dataset_example/' # '/Users/odeniz/Dropbox/data-science/Deep Learning/Semantic Segmentation/dataset_example/' 

In [5]:
filenames = tf.train.string_input_producer([os.path.join(data_dir, 'images/%d.jpg' % i) for i in range(0, 27)]) 
filenames_labels = tf.train.string_input_producer([os.path.join(data_dir, 'masks/%d.jpg' % i) for i in range(0, 27)]) 

In [8]:
def _generate_image_and_label_batch(image, label, min_queue_examples, batch_size, shuffle):
    
    """ construct a queued batch of images and labels. 
    
    Args: 
        image: 3-D Tensor of [height, width, 3] of type.float32.
        label: 1-D Tensor of type.int32
        min_queue_examples: int32, minimum number of samples to retain 
            in the queue that provides the batches of examples 
        batch_size: Number of images per batch 
        shuffle: boolean indicating whether to use a shuffling queue. 
    
   Returns: 
       images: Images. 4D tensor of [batch_size, height, width, 3] size.
       labels: Labels. 1D tensor of [batch_size] size. 
     """
    
    # create a queue that shuffles the examples, and then read 
    # 'batch_size' images + labels from the example queue. 
    num_preprocess_threads = 1 # 16
    if shuffle: 
        images, label_batch = tf.train.shuffle_batch(
            [image, label], 
            batch_size=batch_size,
            num_threads=num_preprocess_threads, 
            capacity=min_queue_examples + 3 * batch_size,
            min_after_dequeue=min_queue_examples)
        
    else: 
        images, label_batch = tf.train.batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size)
        
    # display the training images in the visualizer
    tf.summary.image('images', images)
    
    return images, label_batch  # tf.reshape(label_batch, [batch_size])  


In [9]:
def distorted_inputs(data_dir, validation, batch_size):
    """ construct augmented input
    
        data_dir: path to data directory
        batch_size: number of images per batch
        
        Returns: 
        images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
        labels: Labels. 1D tensor of [batch_size] size. 
    """
    if validation: 
        filenames = [os.path.join(data_dir, 'validation/images/%d.jpg' % i) for i in range(27, 53)] 
        filenames_labels = [os.path.join(data_dir, 'validation/masks/%d.png' % i) for i in range(27, 53)]
    else: 
        filenames = [os.path.join(data_dir, 'images/%d.jpg' % i) for i in range(0, 26)] 
        filenames_labels = [os.path.join(data_dir, 'masks/%d.png' % i) for i in range(0, 26)] 
    
    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)
            
    for f in filenames_labels:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)


            
    # create a queue that produces the filenames to read
    filename_queue = tf.train.string_input_producer(filenames)
    filename_label_queue = tf.train.string_input_producer(filenames_labels)
    

    reader = tf.WholeFileReader()
    _, imageValue = reader.read(filename_queue)
    image_bytes = tf.image.decode_image(imageValue)
    image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
    reshaped_image = tf.cast(image, tf.float32)
    
    # read labels 
#     read_input_labels = read_data(filename_label_queue)
#     label = read_input_labels.float32image
    _, labelValue = reader.read(filename_label_queue)
    label_bytes = tf.image.decode_image(labelValue)
    label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1))
    
    
    height = RESIZED_IMAGE_HEIGHT
    width = RESIZED_IMAGE_WIDTH
    
    
    # CHANGE. RANDOMIZE THESE OPERATIONS HERE. 
    # because these operations are not commutative, consider randomizing 
    # the order of their operation. 
    
    #############################################
    # Randomly crop a [height, width] section of the image.
    distorted_image = tf.random_crop(reshaped_image, [height, width, 3])
    # randomly crop the image horizontally. 
#     distorted_image = tf.image.random_flip_left_right(distorted_image)
    
    # subtract off the mean and divide by the variance of the pixels. 
    float_image = tf.image.per_image_standardization(distorted_image)
    resized_label = tf.random_crop(label, [height, width, 1])
    #############################################
    
    # set the shapes of tensors.
#     float_image.set_shape([height, width, 3])
#     label.set_shape([IMAGE_HEIGHT, IMAGE_WIDTH, 1])
    
    # ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue)
    
    print ("Filling queue with %d images before starting to train. This will take a few minutes. "
          % min_queue_examples)


      
    # generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image, resized_label,
                                          min_queue_examples, batch_size,
                                          shuffle=True)

<br> <br/> 

<br><br/>

## Model

In [11]:
def _variable_on_cpu(name, shape, initializer):
    """Helper to create a Variable stored on CPU memory.
    Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable
    Returns:
    Variable Tensor
    """
    with tf.device('/gpu:0'):
        var = tf.get_variable(name, shape, initializer=initializer)
    return var

In [12]:
def _variable_with_weight_decay(name, shape, initializer, wd):
    """Helper to create an initialized Variable with weight decay.
    Note that the Variable is initialized with a truncated normal distribution.
    A weight decay is added only if one is specified.
    Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.
    Returns:
    Variable Tensor
    """
    var = _variable_on_cpu(
      name,
      shape,
      initializer)
    if wd is not None:
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
        tf.add_to_collection('losses', weight_decay)
    return var

In [13]:
def _add_loss_summaries(total_loss):
    """Add summaries for losses.
    Generates moving average for all losses and associated summaries for
    visualizing the performance of the network.
    Args:
    total_loss: Total loss from loss().
    Returns:
    loss_averages_op: op for generating moving averages of losses.
    """
    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    losses = tf.get_collection('losses')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Name each loss as '(raw)' and name the moving average version of the loss
        # as the original loss name.
        tf.summary.scalar(l.op.name +' (raw)', l)
        tf.summary.scalar(l.op.name, loss_averages.average(l))

    return loss_averages_op


In [14]:
def msra_initializer(kl, dl):
    """
    kl for kernel size, dl for filter number
    """
    stddev = math.sqrt(2. / (kl**2 * dl))
    return tf.truncated_normal_initializer(stddev=stddev)


def orthogonal_initializer(scale = 1.1):
    ''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
    '''
    def _initializer(shape, dtype=tf.float32, partition_info=None):
        flat_shape = (shape[0], np.prod(shape[1:]))
        a = np.random.normal(0.0, 1.0, flat_shape)
        u, _, v = np.linalg.svd(a, full_matrices=False)
        # pick the one with the correct shape
        q = u if u.shape == flat_shape else v
        q = q.reshape(shape) #this needs to be corrected to float32
        return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
    return _initializer


In [15]:
def get_weight_initializer():
    if(CONV_INIT == "var_scale"):
        initializer = tf.contrib.layers.variance_scaling_initializer()
    elif(CONV_INIT == "xavier"):
        initializer=tf.contrib.layers.xavier_initializer()
    else:
        raise ValueError("Chosen weight initializer does not exist")
   

In [16]:
def loss(logits, labels):
    """
      loss func without re-weighting
    """
    # Calculate the average cross entropy loss across the batch.
    logits = tf.reshape(logits, (-1,NUM_CLASSES))
    labels = tf.reshape(labels, [-1])

    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits=logits, labels=labels, name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)

    return tf.add_n(tf.get_collection('losses'), name='total_loss')


def weighted_loss(logits, labels, num_classes, head=None):
    """ median-frequency re-weighting """
    with tf.name_scope('loss'):

        logits = tf.reshape(logits, (-1, num_classes))

        epsilon = tf.constant(value=1e-10)

        logits = logits + epsilon

        # construct one-hot label array
        label_flat = tf.reshape(labels, (-1, 1))

        # should be [batch ,num_classes]
        labels = tf.reshape(tf.one_hot(label_flat, depth=num_classes), (-1, num_classes))

        softmax = tf.nn.softmax(logits)

        cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax + epsilon), head), axis=[1])

        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')

        tf.add_to_collection('losses', cross_entropy_mean)

        loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

    return loss



def cal_loss(logits, labels):
    loss_weight = np.array([
      0.2595,
      0.1826,
      4.5640,
      0.1417,
      0.9051,
      0.3826,
      9.6446,
      1.8418,
      0.6823,
      6.2478,
      7.3614,
      1.0974]) # class 0~11

    labels = tf.cast(labels, tf.int32)
    return loss(logits, labels)
    # now return weighted_loss(logits, labels, num_classes=NUM_CLASSES, head=loss_weight)

In [17]:
def loss_calc(logits, labels):
    """
        logits: tensor, float - [batch_size, width, height, num_classes].
        labels: tensor, int32 - [batch_size, width, height, num_classes].
    """
    # construct one-hot label array
    label_flat = tf.reshape(labels, (-1, 1))
    labels = tf.reshape(tf.one_hot(label_flat, depth=NUM_CLASSES), (-1, NUM_CLASSES))

    #This motif is needed to hook up the batch_norm updates to the training
#     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
#     with tf.control_dependencies(update_ops):
#         cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
#         tf.summary.scalar('loss', cross_entropy)
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    tf.summary.scalar('loss', cross_entropy)
    return cross_entropy


def unpool_with_argmax(pool, ind, name = None, ksize=[1, 2, 2, 1]):

    """
       Unpooling layer after max_pool_with_argmax.
       Args:
           pool:   max pooled output tensor
           ind:      argmax indices
           ksize:     ksize is the same as for the pool
       Return:
           unpool:    unpooling tensor
    """
    with tf.variable_scope(name):
        input_shape = pool.get_shape().as_list()
        output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3])

        flat_input_size = np.prod(input_shape)
        flat_output_shape = [output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]]

        pool_ = tf.reshape(pool, [flat_input_size])
        batch_range = tf.reshape(tf.range(output_shape[0], dtype=ind.dtype), shape=[input_shape[0], 1, 1, 1])
        b = tf.ones_like(ind) * batch_range
        b = tf.reshape(b, [flat_input_size, 1])
        ind_ = tf.reshape(ind, [flat_input_size, 1])
        ind_ = tf.concat([b, ind_], 1)

        ret = tf.scatter_nd(ind_, pool_, shape=flat_output_shape)
        ret = tf.reshape(ret, output_shape)
        return ret

In [18]:
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None):
    in_channel = shape[2]
    out_channel = shape[3]
    k_size = shape[0]
    with tf.variable_scope(name) as scope:
        kernel = _variable_with_weight_decay('ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None)
        conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu('biases', [out_channel], tf.constant_initializer(0.0))
        bias = tf.nn.bias_add(conv, biases)
        if activation is True:
            conv_out = tf.nn.relu(batch_norm_layer(bias, train_phase, scope.name))
        else:
            conv_out = batch_norm_layer(bias, train_phase, scope.name)
    return conv_out

def get_deconv_filter(f_shape):
    """
       reference: https://github.com/MarvinTeichmann/tensorflow-fcn
    """
    width = f_shape[0]
    height = f_shape[0]
    f = ceil(width/2.0)
    c = (2 * f - 1 - f % 2) / (2.0 * f)
    bilinear = np.zeros([f_shape[0], f_shape[1]])
    for x in range(width):
        for y in range(height):
            value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
            bilinear[x, y] = value
    weights = np.zeros(f_shape)
    for i in range(f_shape[2]):
        weights[:, :, i, i] = bilinear

    init = tf.constant_initializer(value=weights,
                                 dtype=tf.float32)
    return tf.get_variable(name="up_filter", initializer=init,
                         shape=weights.shape)



def deconv_layer(inputT, f_shape, output_shape, stride=2, name=None):
    # output_shape = [b, w, h, c]
    # sess_temp = tf.InteractiveSession()
    sess_temp = tf.global_variables_initializer()
    strides = [1, stride, stride, 1]
    with tf.variable_scope(name):
        weights = get_deconv_filter(f_shape)
        deconv = tf.nn.conv2d_transpose(inputT, weights, output_shape,
                                        strides=strides, padding='SAME')
    return deconv


def batch_norm_layer(inputT, is_training, scope):
    return tf.cond(is_training,
          lambda: tf.contrib.layers.batch_norm(inputT, is_training=True,
                           center=False, updates_collections=None, scope=scope+"_bn"),
          lambda: tf.contrib.layers.batch_norm(inputT, is_training=False,
                           updates_collections=None, center=False, scope=scope+"_bn", reuse = True))


In [19]:
def inference(images, batch_size, phase_train):
    # norm1
    norm1 = tf.nn.lrn(images, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75,
                name='norm1')
    # conv1
    conv1 = conv_layer_with_bn(norm1, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1")
    print ("conv1 shape is: ", conv1.get_shape())
    # pool1
    pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
                           padding='SAME', name='pool1')
    print ("pool1 shape is: ", pool1.get_shape())
    
    
    # conv2
    conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2")
    print ("conv2 shape is: ", conv2.get_shape())
    
    # pool2
    pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1], padding='SAME', name='pool2')
    print ("pool2 shape is: ", pool2.get_shape())
    
    
    # conv3
    conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3")
    print ("conv3 shape is: ", conv3.get_shape())
    
    # pool3
    pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1], padding='SAME', name='pool3')
    print ("pool3 shape is: ", pool3.get_shape())
    
    
    # conv4
    conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4")
    print ("conv4 shape is: ", conv4.get_shape())
    
    # pool4
    pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1], padding='SAME', name='pool4')
    print ("pool4 shape is: ", pool4.get_shape())

    """ End of encoder """
    
    """ start upsample """
    # upsample4
    # Need to change when using different dataset out_w, out_h
#     upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
#     upsample4 = deconv_layer(pool4, [2, 2, 64, 64], pool4.get_shape(), 2, "up4") # [batch_size, 45, 60, 64]
    # decode 4
    print ("pool4 shape is: ", pool4.get_shape())
    unpool_4 = unpool_with_argmax(pool4, ind=pool4_indices, name='unpool_4')
    print ("unpool4 shape is: ", unpool_4.get_shape())
    conv_decode4 = conv_layer_with_bn(unpool_4, [7, 7, 64, 64], phase_train, False, name="conv_decode4")
    print ("conv_decode4 shape is: ", conv_decode4.get_shape())
    print ("")
    # upsample 3
#     upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3') 
#     upsample3= deconv_layer(conv_decode4, [2, 2, 64, 64], conv_decode4.get_shape(), 2, "up3") # [batch_size, 90, 120, 64]
    # decode 3
    
    print ("pool3 shape is: ", pool3.get_shape())
    unpool_3 = unpool_with_argmax(conv_decode4, ind=pool3_indices, name='unpool_3')
    print ("unpool_3 shape is: ", unpool_3.get_shape())
    conv_decode3 = conv_layer_with_bn(unpool_3, [7, 7, 64, 64], phase_train, False, name="conv_decode3")
    print ("conv_decode3 shape is: ", conv_decode3.get_shape())
    print ("")
    # upsample2
#     upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
#     upsample2= deconv_layer(conv_decode3, [2, 2, 64, 64], conv_decode3.get_shape(), 2, "up2") #  [batch_size, 180, 240, 64]
    # decode 2
    print ("pool2 shape is: ", pool2.get_shape())
    unpool_2 = unpool_with_argmax(conv_decode3, ind=pool2_indices, name='unpool_2')
    print ("unpool_2 shape is: ", unpool_2.get_shape())
    conv_decode2 = conv_layer_with_bn(unpool_2, [7, 7, 64, 64], phase_train, False, name="conv_decode2")
    print ("conv_decode2 shape is: ", conv_decode2.get_shape())
    print ("")
    
    # upsample1
#     upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
#     upsample1= deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, RESIZED_IMAGE_HEIGHT, RESIZED_IMAGE_WIDTH, 64], 2, "up1") # 
    # decode4
    print ("pool1 shape is: ", pool1.get_shape())
    unpool_1 = unpool_with_argmax(conv_decode2, ind=pool1_indices, name='unpool_1')
    print ("unpool_1 shape is: ", unpool_1.get_shape())
    conv_decode1 = conv_layer_with_bn(unpool_1, [7, 7, 64, 64], phase_train, False, name="conv_decode1")
    print ("conv_decode1 shape is: ", conv_decode1.get_shape())
    print ("")
    
    """ end of Decode """
    
    
    """ Start Classify """
    # output predicted class number (6)
#     initializer = get_weight_initializer()
    with tf.variable_scope('conv_classifier') as scope:
        kernel = _variable_with_weight_decay('weights',
                                           shape=[1, 1, 64, NUM_CLASSES],
                                           initializer=msra_initializer(1, 64),
                                           wd=0.0005)
        conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
        print ("conv shape is: ", conv.get_shape())
        print ("biases shape is: ", biases.get_shape())
        conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)

    logit = conv_classifier
#     loss = cal_loss(conv_classifier, labels)

#     return loss, logit
    return logit

<br> <br/>

## Run the Session: Train & Evaluate 

In [20]:
# constants describing the training process. 
MOVING_AVERAGE_DECAY = 0.9999    # the decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0     # Epochs after which learning rate decays. 
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor. 
INITIAL_LEARNING_RATE = 0.1      # Initial learning rate. 

In [22]:
def train(total_loss, global_step):
    total_sample = 27
    num_batches_per_epoch = 27/1
    """ fix lr """
    lr = INITIAL_LEARNING_RATE
    loss_averages_op = _add_loss_summaries(total_loss)

    # Compute gradients.
    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.AdamOptimizer(lr)
        grads = opt.compute_gradients(total_loss)
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)

    # Add histograms for gradients.
    for grad, var in grads:
        if grad is not None:
            tf.summary.histogram(var.op.name + '/gradients', grad)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op

In [23]:
def training(is_finetune=False):
#     max_steps = 20000
    max_steps = 2000
    batch_size = BATCH_SIZE
    max_steps = MAX_STEPS
    train_dir = LOG_DIR # /tmp3/first350/TensorFlow/Logs
    image_w = RESIZED_IMAGE_WIDTH
    image_h = RESIZED_IMAGE_HEIGHT
    image_c = IMAGE_DEPTH
    # should be changed if your model stored by different convention
    startstep = 0 # if not is_finetune else int(FLAGS.finetune.split('-')[-1])

    graph = tf.Graph()
    with graph.as_default():

        train_data_node = tf.placeholder( tf.float32, shape=[batch_size, image_h, image_w, image_c])
        train_labels_node = tf.placeholder(tf.int64, shape=[batch_size, image_h, image_w, 1])
        phase_train = tf.placeholder(tf.bool, name='phase_train')
        global_step = tf.Variable(0, trainable=False)
        
        
        images, labels = distorted_inputs(data_dir=data_dir, validation=False, batch_size=BATCH_SIZE)
        val_images, val_labels = distorted_inputs(data_dir=data_dir, validation=True, batch_size=BATCH_SIZE)

        # Build a Graph that computes the logits predictions from the inference model.
        eval_prediction = inference(train_data_node, batch_size, phase_train)
        loss = loss_calc(eval_prediction, train_labels_node)
#         loss = cal_loss(eval_prediction, train_labels_node)

        # Build a Graph that trains the model with one batch of examples and updates the model parameters.
        train_op =  train(loss, global_step) # train(loss)
        saver = tf.train.Saver(tf.global_variables())
        summary_op = tf.summary.merge_all()

        # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)

        
        with tf.Session(graph=graph) as sess:
            # Build an initialization operation to run below.
            if (is_finetune == True):
                saver.restore(sess, finetune_ckpt )
            else:
                init = tf.global_variables_initializer()
                sess.run(init)

            # Start the queue runners.
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            # Summery placeholders
            summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
            average_pl = tf.placeholder(tf.float32)
            acc_pl = tf.placeholder(tf.float32)
            iu_pl = tf.placeholder(tf.float32)
            average_summary = tf.summary.scalar("test_average_loss", average_pl)
            acc_summary = tf.summary.scalar("test_accuracy", acc_pl)
            iu_summary = tf.summary.scalar("Mean_IU", iu_pl)

            for step in range(startstep, startstep + max_steps):
                image_batch ,label_batch = sess.run([images, labels])
                # since we still use mini-batches in validation, still set bn-layer phase_train = True
                feed_dict = {
                  train_data_node: image_batch,
                  train_labels_node: label_batch,
                  phase_train: True
                }
                start_time = time.time()

                _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
                duration = time.time() - start_time

                assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                if step % 10 == 0:
                    num_examples_per_step = batch_size
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = float(duration)

                    format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                              'sec/batch)')
                    print (format_str % (datetime.now(), step, loss_value,
                                       examples_per_sec, sec_per_batch))

                    # eval current training batch pre-class accuracy
                    pred = sess.run(eval_prediction, feed_dict=feed_dict)
                    per_class_acc(pred, label_batch)

                if step % 100 == 0:
                    print("start validating.....")
                    total_val_loss = 0.0
                    hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
                    for test_step in range(int(TEST_ITER)):
                        val_images_batch, val_labels_batch = sess.run([val_images, val_labels])

                        _val_loss, _val_pred = sess.run([loss, eval_prediction], feed_dict={
                          train_data_node: val_images_batch,
                          train_labels_node: val_labels_batch,
                          phase_train: True
                        })
                        total_val_loss += _val_loss
                        hist += get_hist(_val_pred, val_labels_batch)
                    print("val loss: ", total_val_loss / TEST_ITER)
                    acc_total = np.diag(hist).sum() / hist.sum()
                    iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
                    test_summary_str = sess.run(average_summary, feed_dict={average_pl: total_val_loss / TEST_ITER})
                    acc_summary_str = sess.run(acc_summary, feed_dict={acc_pl: acc_total})
                    iu_summary_str = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)})
                    print_hist_summery(hist)
                    print(" end validating.... ")

                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.add_summary(test_summary_str, step)
                    summary_writer.add_summary(acc_summary_str, step)
                    summary_writer.add_summary(iu_summary_str, step)
                # Save the model checkpoint periodically.
                if step % 1000 == 0 or (step + 1) == max_steps:
                    checkpoint_path = os.path.join(train_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)

        coord.request_stop()
        coord.join(threads)

In [24]:
# model_ckpt_dir =
LOG_DIR = '/Users/odeniz/Dropbox/data-science/Deep Learning/Semantic Segmentation/dataset_example/logs'
res_output_dir = '/data/dataset_example/test_results'
# '/Users/odeniz/Dropbox/data-science/Deep Learning/Semantic Segmentation/dataset_example/test_results'
SAVA_IMAGE = True
CONV_INIT = "var_scale"
finetune_ckpt = ''
TESTING = ''
MAX_STEPS = 20000

In [25]:
def per_class_acc(predictions, label_tensor):
    labels = label_tensor
    num_class = NUM_CLASSES
    size = predictions.shape[0]
    hist = np.zeros((num_class, num_class))
    for i in range(size):
        hist += fast_hist(labels[i].flatten(), predictions[i].argmax(2).flatten(), num_class)
    acc_total = np.diag(hist).sum() / hist.sum()
    print ('accuracy = %f'%np.nanmean(acc_total))
    iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
    print ('mean IU  = %f'%np.nanmean(iu))
    for ii in range(num_class):
        if float(hist.sum(1)[ii]) == 0:
            acc = 0.0
        else:
            acc = np.diag(hist)[ii] / float(hist.sum(1)[ii])
        print("    class # %d accuracy = %f "%(ii,acc))

def fast_hist(a, b, n):
    k = (a >= 0) & (a < n)
    return np.bincount(n * a[k].astype(int) + b[k], minlength=n**2).reshape(n, n)

def get_hist(predictions, labels):
    num_class = predictions.shape[3] #becomes 2 for aerial - correct
    batch_size = predictions.shape[0]
    hist = np.zeros((num_class, num_class))
    for i in range(batch_size):
        hist += fast_hist(labels[i].flatten(), predictions[i].argmax(2).flatten(), num_class)
    return hist

def print_hist_summery(hist):
    acc_total = np.diag(hist).sum() / hist.sum()
    print ('accuracy = %f'%np.nanmean(acc_total))
    iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
    print ('mean IU  = %f'%np.nanmean(iu))
    for ii in range(hist.shape[0]):
        if float(hist.sum(1)[ii]) == 0:
            acc = 0.0
        else:
            acc = np.diag(hist)[ii] / float(hist.sum(1)[ii])
        print("    class # %d accuracy = %f "%(ii, acc))

In [26]:
def placeholder_inputs(batch_size):
    image_w = RESIZED_IMAGE_WIDTH
    image_h = RESIZED_IMAGE_HEIGHT
    image_c = IMAGE_DEPTH
    images = tf.placeholder(tf.float32, shape=[batch_size, image_h, image_w, IMAGE_DEPTH])
    labels = tf.placeholder(tf.int64, [batch_size, image_h, image_w, 1])
    is_training = tf.placeholder(tf.bool, name='is_training')
    keep_prob = tf.placeholder(tf.float32, name="keep_probabilty")

    return images, labels, is_training, keep_prob

In [27]:
def get_filename_list(path):
    
    image_filenames = sorted(os.listdir(path+'/images')) #sort by names to get img and label after each other
    label_filenames = sorted(os.listdir(path+'/masks')) #sort by names to get img and label after each other
    
    #Adding correct path to the each filename in the lists
    step=0
    for name in image_filenames:
            image_filenames[step] = path+"/images/"+name
            step=step+1
    step=0
    for name in label_filenames:
        label_filenames[step] = path+"/masks/"+name
        step=step+1

    return image_filenames, label_filenames


In [28]:
import glob

def get_filename_list2(path):
    
    image_filenames = sorted(glob.glob(path+'/images/*.jpg'))
    label_filenames = sorted(glob.glob(path+'/masks/*.png'))
    
    return image_filenames, label_filenames


In [29]:
def get_all_test_data(im_list, la_list):
    images = []
    labels = []
    index = 0
    for im_filename, la_filename in zip(im_list, la_list):      
        im = skimage.io.imread(im_filename)
        im = skimage.transform.resize(im, (RESIZED_IMAGE_HEIGHT, RESIZED_IMAGE_WIDTH), preserve_range=True)
        im = np.array(im, np.float32)
        im = im[np.newaxis]
        la = skimage.io.imread(la_filename)
        la = skimage.transform.resize(la, (RESIZED_IMAGE_HEIGHT, RESIZED_IMAGE_WIDTH), preserve_range=True)
        la = la[np.newaxis]
        la = la[...,np.newaxis]
        images.append(im)
        labels.append(la)
    return images, labels

In [30]:
val_dir = '/data/dataset_example/validation'
# '/Users/odeniz/Dropbox/data-science/Deep Learning/Semantic Segmentation/dataset_example/validation'

In [32]:
def test():
    print("----------- In test method ----------")
    
    
    with tf.Graph().as_default():
        
        
        test_data_node, test_labels_node, is_training, keep_prob = placeholder_inputs(BATCH_SIZE)  
        image_filenames, label_filenames = get_filename_list2(val_dir)
        images, labels = get_all_test_data(image_filenames, label_filenames)

        logits = inference(test_data_node, BATCH_SIZE, is_training)
        pred = tf.argmax(logits, axis=3)
        saver = tf.train.Saver()

        with tf.Session() as sess:
    #         saver.restore(sess, model_ckpt_dir)


            # Start the queue runners.
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)        
            hist = np.zeros((NUM_CLASSES, NUM_CLASSES))

            step=0
            for image_batch, label_batch  in zip(images, labels):
                feed_dict = {
                    test_data_node: image_batch,
                    test_labels_node: label_batch,
                    is_training: False,
                    keep_prob: 1.0 #During testing droput should be turned off -> 100% chance of keeping variable
                }

                dense_prediction, im = sess.run(fetches=[logits, pred], feed_dict=feed_dict)
                per_class_acc(dense_prediction, label_batch)
                # output_image to verify
                if (SAVA_IMAGE):
                    if(step < 10):
                        numb_img = "000"+str(step)
                    elif(step < 100):
                        numb_img = "00"+str(step)
                    elif(step < 1000):
                        numb_img = "0"+str(step)
                    write_image(im[0], os.path.join(res_output_dir +'/testing_image'+numb_img+'.png')) #Printing all test images
                step=step+1
                hist += get_hist(dense_prediction, label_batch)
            acc_total = np.diag(hist).sum() / hist.sum()
            iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
            print("acc: ", acc_total)
            print("IU: ", iu)
            print("mean IU: ", np.nanmean(iu))

        coord.request_stop()
        coord.join(threads)


def write_image(image, filename):
    """ store label data to colored image """
    Sky = [0,0,0] #
    Building = [128,128,0] #green-ish

    r = image.copy()
    g = image.copy()
    b = image.copy()

    label_colours = np.array([Sky, Building])
    for label in range(0, NUM_CLASSES): #for all labels - shouldn't this be set according to num_class?
        #Replacing all instances in matrix with label value with the label colour
        r[image==label] = label_colours[label,0] #red is channel/debth 0
        g[image==label] = label_colours[label,1] #green is channel/debth 1
        b[image==label] = label_colours[label,2] #blue is channel/debth 2
    rgb = np.zeros((image.shape[0], image.shape[1], 3))
    rgb[:,:,0] = r/1.0
    rgb[:,:,1] = g/1.0
    rgb[:,:,2] = b/1.0
    im = Image.fromarray(np.uint8(rgb))
    im.save(filename)

In [33]:
images, masks = distorted_inputs(data_dir=data_dir, validation=False, batch_size=BATCH_SIZE)

Filling queue with 10 images before starting to train. This will take a few minutes. 


In [34]:
val_images, val_masks = distorted_inputs(data_dir=data_dir, validation=True, batch_size=BATCH_SIZE)

Filling queue with 10 images before starting to train. This will take a few minutes. 


In [37]:
images

<tf.Tensor 'shuffle_batch:0' shape=(3, 256, 512, 3) dtype=float32>

In [39]:
train_data_node = tf.placeholder( tf.float32, shape=[3, 256, 512, 3])

In [45]:
train_data_node

<tf.Tensor 'Placeholder:0' shape=(3, 256, 512, 3) dtype=float32>

In [34]:
phase_train = tf.placeholder(tf.bool, name='phase_train')
inference(images, BATCH_SIZE, phase_train)

conv1 shape is:  (5, 256, 512, 64)
pool1 shape is:  (5, 128, 256, 64)
conv2 shape is:  (5, 128, 256, 64)
pool2 shape is:  (5, 64, 128, 64)
conv3 shape is:  (5, 64, 128, 64)
pool3 shape is:  (5, 32, 64, 64)
conv4 shape is:  (5, 32, 64, 64)
pool4 shape is:  (5, 16, 32, 64)
pool4 shape is:  (5, 16, 32, 64)
unpool4 shape is:  (5, 32, 64, 64)
conv_decode4 shape is:  (5, 32, 64, 64)

pool3 shape is:  (5, 32, 64, 64)
unpool_3 shape is:  (5, 64, 128, 64)
conv_decode3 shape is:  (5, 64, 128, 64)

pool2 shape is:  (5, 64, 128, 64)
unpool_2 shape is:  (5, 128, 256, 64)
conv_decode2 shape is:  (5, 128, 256, 64)

pool1 shape is:  (5, 128, 256, 64)
unpool_1 shape is:  (5, 256, 512, 64)
conv_decode1 shape is:  (5, 256, 512, 64)

conv shape is:  (5, 256, 512, 2)
biases shape is:  (2,)


<tf.Tensor 'conv_classifier/conv_classifier:0' shape=(5, 256, 512, 2) dtype=float32>

<p><p/>

In [35]:
CONV_INIT = "var_scale"
training(is_finetune=False)

Filling queue with 10 images before starting to train. This will take a few minutes. 
Filling queue with 10 images before starting to train. This will take a few minutes. 
conv1 shape is:  (3, 256, 512, 64)
pool1 shape is:  (3, 128, 256, 64)
conv2 shape is:  (3, 128, 256, 64)
pool2 shape is:  (3, 64, 128, 64)
conv3 shape is:  (3, 64, 128, 64)
pool3 shape is:  (3, 32, 64, 64)
conv4 shape is:  (3, 32, 64, 64)
pool4 shape is:  (3, 16, 32, 64)
pool4 shape is:  (3, 16, 32, 64)
unpool4 shape is:  (3, 32, 64, 64)
conv_decode4 shape is:  (3, 32, 64, 64)

pool3 shape is:  (3, 32, 64, 64)
unpool_3 shape is:  (3, 64, 128, 64)
conv_decode3 shape is:  (3, 64, 128, 64)

pool2 shape is:  (3, 64, 128, 64)
unpool_2 shape is:  (3, 128, 256, 64)
conv_decode2 shape is:  (3, 128, 256, 64)

pool1 shape is:  (3, 128, 256, 64)
unpool_1 shape is:  (3, 256, 512, 64)
conv_decode1 shape is:  (3, 256, 512, 64)

conv shape is:  (3, 256, 512, 2)
biases shape is:  (2,)
Instructions for updating:

Future major version



2018-04-10 22:26:44.719790: step 10, loss = 0.18 (4.3 examples/sec; 0.705 sec/batch)
accuracy = 0.996726
mean IU  = 0.498363
    class # 0 accuracy = 0.996726 
    class # 1 accuracy = 0.000000 
2018-04-10 22:26:52.111802: step 20, loss = 0.05 (4.2 examples/sec; 0.713 sec/batch)
accuracy = 0.997637
mean IU  = 0.498819
    class # 0 accuracy = 0.997637 
    class # 1 accuracy = 0.000000 
2018-04-10 22:26:59.534510: step 30, loss = 0.05 (4.2 examples/sec; 0.717 sec/batch)
accuracy = 0.999835
mean IU  = 0.499918
    class # 0 accuracy = 0.999835 
    class # 1 accuracy = 0.000000 
2018-04-10 22:27:06.981497: step 40, loss = 0.02 (4.2 examples/sec; 0.716 sec/batch)
accuracy = 0.999852
mean IU  = 0.499926
    class # 0 accuracy = 0.999852 
    class # 1 accuracy = 0.000000 
2018-04-10 22:27:14.444790: step 50, loss = 0.03 (4.2 examples/sec; 0.713 sec/batch)
accuracy = 0.988535
mean IU  = 0.494268
    class # 0 accuracy = 0.988535 
    class # 1 accuracy = 0.000000 
2018-04-10 22:27:21.93263

  # Remove the CWD from sys.path while we load stuff.


2018-04-10 22:27:44.459261: step 90, loss = 0.04 (4.2 examples/sec; 0.719 sec/batch)
accuracy = 1.000000
mean IU  = 1.000000
    class # 0 accuracy = 1.000000 
    class # 1 accuracy = 0.000000 
2018-04-10 22:27:51.982860: step 100, loss = 0.08 (4.2 examples/sec; 0.718 sec/batch)
accuracy = 1.000000
mean IU  = 1.000000
    class # 0 accuracy = 1.000000 
    class # 1 accuracy = 0.000000 
start validating.....
val loss:  0.0
accuracy = nan
mean IU  = nan
    class # 0 accuracy = 0.000000 
    class # 1 accuracy = 0.000000 
 end validating.... 
2018-04-10 22:28:00.271506: step 110, loss = 0.05 (4.1 examples/sec; 0.723 sec/batch)
accuracy = 1.000000
mean IU  = 1.000000
    class # 0 accuracy = 1.000000 
    class # 1 accuracy = 0.000000 
2018-04-10 22:28:07.811222: step 120, loss = 0.13 (4.2 examples/sec; 0.721 sec/batch)
accuracy = 1.000000
mean IU  = 1.000000
    class # 0 accuracy = 1.000000 
    class # 1 accuracy = 0.000000 
2018-04-10 22:28:15.360454: step 130, loss = 0.05 (4.2 exam

KeyboardInterrupt: 

In [36]:
# CONV_INIT = "var_scale"
# test()