### Implement Unet model to do image segmentation. The target for the input image will be an image of same shape as input and have values 1/0. 1 represent that the pixel lies on the mask of the image and 0 respresents that the pixel doesnt lie on the image mask, We perform CNN layer with padding = "SAME" (size remains same) and max pool to decrease size of image by 1/2 . We then perform transposed convolutions to bring back the image to same size as input image.  


In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import matplotlib.pyplot as plt
%matplotlib inline
import math
import time
from scipy import ndimage,misc

In [2]:
BATCH_SIZE = 25
input_image_size = [256,256]

# set the necessary directories
data_dir = "train/"
mask_dir = "train_masks/"
all_images = os.listdir(data_dir)
def mini_batch_generator(batch_size, data_dir, mask_dir, images, resize_dims, iters, sess):
    while iters > 0:
        ix = np.random.choice(np.arange(len(images)), batch_size)
        img = []
        labels = []
        for i in ix:
           # images
            original_image = tf.image.decode_jpeg(tf.read_file(data_dir + images[i]))
            resized_image = tf.image.resize_images(original_image, resize_dims)
            img.append(resized_image)
            #images.append(original_image)

            # masks
            original_mask = tf.image.rgb_to_grayscale(tf.image.decode_gif(tf.read_file(mask_dir + images[i].split(".")[0] + '_mask.gif')))
            resized_mask = tf.image.resize_images(original_mask, resize_dims)
            labels.append(tf.gather(resized_mask, 0))
            #labels.append(tf.gather(original_mask, 0))
        
        img, labels = sess.run([tf.stack(img), tf.stack(labels)])
        labels = (labels > 0).astype(np.float32)
        iters-=1
        yield img, labels

In [3]:
def weight_initialise(shape):
    initial = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial)

In [4]:
def bias_initialise(shape):
    initial = tf.constant(value=0.1,shape=shape)
    return tf.Variable(initial)

In [5]:
def conv2d(input_layer,filters):
    return tf.nn.conv2d(input_layer,filters,[1,1,1,1],padding="SAME")

def max_pool(input_layer):
    return tf.nn.max_pool(input_layer,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

In [6]:
def down(input_layer,in_channel, out_channel,pool = True ):
    W_conv1 = weight_initialise([3,3,in_channel,out_channel])  ## Filter of shape [filter_height, filter_width, in_channels, out_channels]
    bias1 = bias_initialise([out_channel])
    conv1 = tf.add(conv2d(input_layer,W_conv1),bias1)
    h_conv1 = tf.nn.relu(conv1)
    ## apply second convolution
    W_conv2 = weight_initialise([3,3,out_channel,out_channel])  ## Filter of shape [filter_height, filter_width, in_channels, out_channels]
    bias2 = bias_initialise([out_channel])
    conv2 = tf.add(conv2d(h_conv1,W_conv2),bias2)
    h_conv = tf.nn.relu(conv2)
    if pool == True:
        return max_pool(h_conv),h_conv
    else:
        return h_conv
    

In [7]:
def upsample(input_layer,backend,in_channel,out_channel,out_shape):
    w_upsample = weight_initialise([2,2,out_channel,in_channel]) 
    bias_upsample = bias_initialise([out_channel])
    #outShape = tf.constant([BATCH_SIZE,out_shape[0],out_shape[1],out_channel])
    outShape = tf.concat([tf.constant([BATCH_SIZE]), out_shape, out_shape, tf.constant([out_channel])], axis=0)
    upsample_layer = tf.add(tf.nn.conv2d_transpose(input_layer,w_upsample,outShape,strides=[1,2,2,1]),bias_upsample)
    concat_layer = tf.concat([backend,upsample_layer],axis=3)
    W_conv1 = weight_initialise([3,3,in_channel,out_channel])  ## Filter of shape [filter_height, filter_width, in_channels, out_channels]
    bias1 = bias_initialise([out_channel])
    conv1 = tf.add(conv2d(concat_layer,W_conv1),bias1)
    h_conv1 = tf.nn.relu(conv1)
    ## apply second convolution
    W_conv2 = weight_initialise([3,3,out_channel,out_channel])  ## Filter of shape [filter_height, filter_width, in_channels, out_channels]
    bias2 = bias_initialise([out_channel])
    conv2 = tf.add(conv2d(h_conv1,W_conv2),bias2)
    h_conv = tf.nn.relu(conv2)
    return h_conv

In [8]:
x = tf.placeholder(tf.float32, shape = [None, None, None, None])
y_true = tf.placeholder(tf.float32, shape = [None, None, None, None])

In [9]:
W = tf.zeros(shape=(1,2))
[W.get_shape().as_list()[0]*2]

[2]

In [10]:
starter_learning_rate = 1e-3
global_step_lr = tf.Variable(0,trainable=False)
decay_steps = 5
learning_rate = tf.train.exponential_decay(starter_learning_rate,global_step_lr,decay_steps,0.97,staircase=True)

In [11]:
# start the model
out_channel = 64
down_sample_layers= []

#down1
d1,back1 = down(x,3,out_channel)
down_sample_layers.append(back1)

#down2
out_channel = out_channel*2
d2,back2 = down(d1,out_channel//2,out_channel)
down_sample_layers.append(back2)

#down3
out_channel = out_channel*2
d3,back3 = down(d2,out_channel//2,out_channel)
down_sample_layers.append(back3)

#down4
out_channel = out_channel*2
d4,back4 = down(d3,out_channel//2,out_channel)
down_sample_layers.append(back4)


#down5
out_channel = out_channel*2
d5 = down(d4,out_channel//2,out_channel,pool=False)

#up1
out_channel = out_channel //2
#d5_shape = [d5.get_shape().as_list()[1] *2,d5.get_shape().as_list()[2]*2]
d5_shape = tf.placeholder(tf.int32, [1])
up1 = upsample(d5,down_sample_layers[-1] ,out_channel *2, out_channel, d5_shape)

#up2
out_channel = out_channel //2
#up1_shape = [up1.get_shape().as_list()[1]*2,up1.get_shape().as_list()[2]*2]
up1_shape = tf.placeholder(tf.int32, [1])
up2 = upsample(up1,down_sample_layers[-2] ,out_channel *2, out_channel,up1_shape )

#up3
out_channel = out_channel //2
#up2_shape = [up2.get_shape().as_list()[1]*2,up2.get_shape().as_list()[2]*2]
up2_shape = tf.placeholder(tf.int32, [1])
up3 = upsample(up2,down_sample_layers[-3] ,out_channel *2, out_channel,up2_shape )

#up4
out_channel = out_channel //2
#up3_shape = [up3.get_shape().as_list()[1]*2,up3.get_shape().as_list()[2]*2]
up3_shape = tf.placeholder(tf.int32, [1])
up4 = upsample(up3,down_sample_layers[-4] ,out_channel *2, out_channel,up3_shape )


W_conv = weight_initialise([1,1,out_channel,1])  ## Filter of shape [filter_height, filter_width, in_channels, out_channels]
bias = bias_initialise([1])
conv2 = tf.add(conv2d(up4,W_conv),bias)
#h_conv = tf.nn.sigmoid(conv2)


In [12]:
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=conv2, labels=y_true))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost,global_step_lr)

In [13]:
inn = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(inn)

In [None]:
timestamp = str(math.trunc(time.time()))
if not os.path.exists("carvana_checkpoints"):
    os.mkdir("carvana_checkpoints")
saver = tf.train.Saver(max_to_keep=1000)

In [None]:
'''
index = 0
for (batch_x, batch_y) in mini_batch_generator(BATCH_SIZE,data_dir,mask_dir,all_images,input_image_size, 1000, sess):
    #batch_x, batch_y = sess.run([batch_x, batch_y])
    index +=1
    feed_dict = { x: batch_x, 
                 y_true: batch_y,
                 d5_shape: [(input_image_size[0])//8],
                 up1_shape:[(input_image_size[0])//4],
                 up2_shape:[(input_image_size[0])//2],
                 up3_shape:[(input_image_size[0])] }
    
    _, c = sess.run([optimizer, cost], feed_dict=feed_dict)
    print("Epoch: {} cost={:.4f}".format(index,c))
    if index % 50 == 0:
        saved_file = saver.save(sess, 'carvana_checkpoints/carvana_train' + timestamp, global_step=index)
        print("Saved file: " + saved_file)
saved_file = saver.save(sess, 'carvana_checkpoints/carvana_train' + timestamp, global_step=index)
print("Saved file: " + saved_file) 
'''

In [None]:
new_saver = tf.train.import_meta_graph('carvana_checkpoints/carvana_train1506353226-650.meta')


In [14]:
new_saver = tf.train.Saver()

In [15]:
new_saver.restore(sess, "carvana_checkpoints/carvana_train1506353226-650")

INFO:tensorflow:Restoring parameters from carvana_checkpoints/carvana_train1506353226-650


In [16]:
def rle_encode(mask_image):
    pixels = mask_image.flatten()
    # We avoid issues with '1' at the start or end (at the corners of 
    # the original image) by setting those pixels to '0' explicitly.
    # We do not expect these to be non-zero for an accurate mask, 
    # so this should not harm the score.
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    return runs

In [17]:
def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)

In [18]:
test_dir = "test/"
all_test_images = os.listdir(test_dir)
count = 0
h_conv = tf.nn.sigmoid(conv2)
resultList = {}

In [None]:
df = pd.read_csv('sample_submission.csv', index_col=0)
iall=0
count = 0
while iall < len(all_test_images):
    resized_image = []
    startTime = time.time()
    for pos in range(iall, iall + BATCH_SIZE):
        original_image = ndimage.imread(test_dir + all_test_images[pos], mode="RGB")
        resized_image.append(misc.imresize(original_image,(input_image_size[0],input_image_size[1])))
    feed_dict = { x: resized_image, 
                 d5_shape: [(input_image_size[0])//8],
                 up1_shape:[(input_image_size[0])//4],
                 up2_shape:[(input_image_size[0])//2],
                 up3_shape:[(input_image_size[0])] }
    predicted_masks = sess.run(h_conv, feed_dict=feed_dict)
    for i in range(BATCH_SIZE):
        predicted_mask =misc.imresize(predicted_masks[i].reshape([256,256]), (1280,1918))
        predicted_mask[predicted_mask >= 0.5] = 1
        predicted_mask[predicted_mask < 0.5] = 0
        rle_str = rle_to_string(rle_encode(predicted_mask))
        df.iloc[count] = rle_str
        count +=1
    print("timer ",time.time()- startTime)
    iall = iall+BATCH_SIZE
df.to_csv('results3.csv')

timer  58.893924951553345
timer  54.24568700790405
timer  48.399624824523926
timer  44.16572117805481
timer  51.2540979385376
timer  49.70204186439514
timer  50.36096811294556
timer  65.44501614570618
timer  76.31970477104187
timer  52.86351704597473
timer  48.49543881416321
timer  50.35770297050476
timer  54.128912687301636
timer  54.560681104660034
timer  52.38670802116394
timer  50.637052059173584
timer  50.487595081329346
timer  52.2394437789917
timer  52.358399868011475
timer  50.803094148635864
timer  50.07304906845093
timer  50.6238317489624
timer  50.677494049072266
timer  50.595016956329346
timer  50.43800234794617
timer  51.90107202529907
timer  52.16747689247131
timer  50.976927042007446
timer  50.40223789215088
timer  50.42082190513611
timer  50.059161901474
timer  50.79740595817566
timer  51.12510895729065
timer  49.17813587188721
timer  48.74488568305969
timer  49.99537706375122
timer  49.0375919342041
timer  49.518210649490356
timer  50.437254905700684
timer  51.03877568

timer  42.31587290763855
timer  41.22482895851135
timer  40.8755989074707
timer  41.52111196517944
timer  41.58559012413025
timer  41.59040403366089
timer  41.384756088256836
timer  41.38383102416992
timer  41.24904227256775
timer  41.44189524650574
timer  41.83457112312317
timer  41.180959939956665
timer  42.034595012664795
timer  41.16724920272827
timer  41.34478998184204
timer  41.61184477806091
timer  41.49944305419922
timer  40.95903205871582
timer  41.68665409088135
timer  41.647741079330444
timer  41.43984293937683
timer  41.2131142616272
timer  41.54559016227722
timer  41.749245166778564
timer  41.1705219745636
timer  41.909900188446045
timer  41.18733859062195
timer  41.96669793128967
timer  41.25162482261658
timer  41.81911087036133
timer  41.085965633392334
timer  42.226118087768555
timer  41.856935024261475
timer  42.65599203109741
timer  41.83395290374756
timer  41.65053415298462
timer  41.1867561340332
timer  41.928064823150635
timer  41.204203844070435
timer  41.84754300

timer  41.4180212020874
timer  41.425740003585815
timer  42.05661106109619
timer  41.49828910827637
timer  41.314104080200195
timer  41.353513956069946
timer  41.707189083099365
timer  41.37981104850769
timer  41.494455099105835
timer  43.813644886016846
timer  42.78277397155762
timer  41.00992202758789
timer  41.262449979782104
timer  41.43168306350708
timer  42.5358829498291
timer  41.392524003982544
timer  41.43922305107117
timer  42.14717698097229
timer  41.86045527458191
timer  41.73125505447388
timer  41.89033603668213
timer  41.53768587112427
timer  40.95202112197876
timer  41.18629503250122
timer  41.703204870224
timer  41.93224620819092
timer  41.40611505508423
timer  42.00021696090698
timer  41.65213918685913
timer  42.0779767036438
timer  41.64795184135437
timer  41.510091066360474
timer  42.03234386444092
timer  41.332536697387695
timer  41.469266176223755
timer  40.98028373718262
timer  41.351115703582764
timer  41.449976205825806
timer  41.22499704360962
timer  41.6067059

timer  45.65088200569153
timer  47.46440410614014
timer  43.97097396850586
timer  45.197802782058716
timer  43.5337119102478
timer  42.18341088294983
timer  47.90612196922302
timer  52.88849115371704
timer  44.40051198005676
timer  50.64027714729309
timer  56.62964129447937
timer  52.00619387626648
timer  55.26341986656189
timer  51.850255250930786
timer  55.68530201911926
timer  53.26087474822998
timer  51.65933275222778
timer  54.289440870285034
timer  55.75250005722046
timer  63.304332971572876
timer  45.42712616920471
timer  46.14480900764465
timer  44.3337881565094
timer  63.1315062046051
timer  65.29491090774536
timer  48.369713306427
timer  58.11385416984558
timer  49.18306016921997
timer  69.77237892150879
timer  50.65496897697449
timer  64.13353705406189
timer  59.12129998207092
timer  56.87194490432739
timer  48.4839129447937
timer  46.46564793586731
timer  48.86198687553406
timer  45.433265924453735
timer  47.97277474403381
timer  56.11764621734619
timer  49.688068151474
tim

timer  48.12454676628113
timer  45.57969784736633
timer  49.988723039627075
timer  47.60054087638855
timer  42.78017497062683
timer  44.63721227645874


In [None]:
predicted_mask[predicted_mask > 0]

In [None]:
rle_to_string(rle_encode(predicted_mask[i]))