In [5]:
import tensorflow as tf
import os
# os.chdir("/content/drive/Colab Notebooks/Kaggle_2018/experiments/")  # colab directory
import utils
import numpy as np
from matplotlib import pyplot as plt
%load_ext autoreload
%autoreload 2
%matplotlib inline
from scipy.ndimage import imread

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
print(len(utils.TRAIN_IMAGE_IDS))
print(len(utils.TEST_IMAGE_IDS))

In [6]:
# sort as image size
tmp_dict = {}
for each in utils.TRAIN_IMAGE_IDS:
  h, w, _ = utils.image2ndarry(each, "train").shape
  key = "{}_{}".format(str(h), str(w))
  if tmp_dict.get(key) is None:
    print(h, w)
    tmp_dict[key] = (each, )
  else:
    tmp_dict[key] += (each, )

TRAIN_IMAGE_IDS = []
for k, v in tmp_dict.items():
  cnt = 0
  for each in v:
    TRAIN_IMAGE_IDS.append(each)
    cnt += 1
  print("{} images in size {} added".format(cnt, k))

256 256
256 320
360 360
520 696
260 347
1024 1024
512 640
603 1272
1040 1388
334 images in size 256_256 added
112 images in size 256_320 added
91 images in size 360_360 added
92 images in size 520_696 added
5 images in size 260_347 added
16 images in size 1024_1024 added
13 images in size 512_640 added
6 images in size 603_1272 added
1 images in size 1040_1388 added


In [90]:
# def slice_ndarray(origin):
#   H,W,_ = origin.shape
#   if H > 600 and W > 600:
#     output = [origin[0:H//2,0:W//2,:] , origin[H//2:,0:W//2,:] , \
#               origin[0:H//2, W//2:,:], origin[H//2:,W//2:,:]]
#   elif H > 600 and W < 600:
#     output = [origin[0:H//2,:,:] , origin[H//2:,:,:] ]
#   elif H < 600 and W > 600:
#     output = [origin[:,0:W//2,:] , origin[:,W//2:,:] ]
#   else:
#     output = [origin]
#   return output

# def custom_weight1(reshape_y):
#     # custom weight
#     # think for a image, masks pixels set and background set have same weight , 
#     # suppose give totoal weight as  1000000
#     # each mask pixel take weight 500000/(mask pixels count) weight
#     # each background pixel take weight 500000/(background pixels count) weight
#     mask_cnt = tf.reduce_sum(reshape_y[:,0])
#     mask_ttl_weight = tf.constant(500000.0,tf.float32)
#     mask_weight = reshape_y[:,0] * (mask_ttl_weight/mask_cnt)
#     bg_cnt = tf.reduce_sum(reshape_y[:,1])
#     bg_ttl_weight = tf.constant(500000.0,tf.float32)
#     bg_weight = reshape_y[:,1] * (bg_ttl_weight/bg_cnt)
#     custom_weight = mask_weight + bg_weight
#     return custom_weight

def extension_transpose(image_id, folder):
    # read in HxWxC
    origin = utils.image2ndarry(image_id, folder=folder)
    # extend border HxWxC
    if folder == "train":
      tmp1 = utils.extend_sides(origin)
    elif folder == "mask":
      tmp1 = origin
    # transpose to CxHxW
    tmp2 = np.transpose(tmp1, (2, 0, 1))
    # slice first 3 layers CxHxW
    tmp3 = tmp2[:3,:,:]
    return tmp3

# def onehot_encoding(origin):
#   # onehot encoding, layer 0 is prob of mask, layer 1 is prod of background
#   mask = origin>0
#   background = np.zeros_like(mask)
#   background[mask==0] = 1
#   return np.concatenate((mask, background), axis=-1)

def batch_data(batch_list):
    for i in range(len(batch_list)):
        image_id = batch_list[i]
        train_tmp = extension_transpose(image_id, "train") # CxHxW
        # take first layer
        train_tmp = np.expand_dims(train_tmp[0,:,:], axis=0)
        mask_tmp = extension_transpose(image_id, "mask") # CxHxW
        mask_tmp = np.expand_dims(mask_tmp[0,:,:], axis=0)
        # stack on N(C)
        if i == 0 :
            train = train_tmp
            mask = mask_tmp
        else:
            train = np.concatenate((train, train_tmp), axis=0)
            mask = np.concatenate((mask, mask_tmp), axis=0)
    # expand dimension to NxHxWx1
    train = np.expand_dims(train, axis=-1)
    mask = np.expand_dims(mask, axis=-1)
    mask[ mask > 1.0 ] = 1.0
    return train, mask

![u-net](assets/unet.png)

In [4]:
class UNet:
    def __init__(self, loss_type):
        self.loss_type = loss_type

    def conv_block(self, x, channel_cnt):
        """
        convolution block with relu activation
        """
        x = tf.layers.conv2d(x,
                          filters = channel_cnt,
                          kernel_size = [3,3],
                          strides = [1,1],
                          padding = "same",
                          activation = tf.nn.relu
                        )
        return x

    def down_sampling(self, x, channel_cnt):
        """
        down samping block:
        max pool[2x2] -> conv -> conv
        """
        x = tf.layers.max_pooling2d(x,
                    pool_size=[2,2], 
                    strides=[2,2], 
                    padding='same')
        x = self.conv_block(x, channel_cnt)
        x = self.conv_block(x, channel_cnt)
        print(str(x))
        return x
    
    def crop(self, x, h, w):
        """crop x by shape (h, w)"""
        H = tf.shape(x)[1]
        W = tf.shape(x)[2]
        h_s = (H - h) // 2
        w_s = (W - w) // 2
        x_crop = x[:, h_s: h_s + h , w_s: w_s + w , :] 
        return x_crop
    
    def skip_connection(self, x1, x2):
        "crop x1 as x2 shape, and concatenate cropped x1 with x2"
        h = tf.shape(x2)[1]
        w = tf.shape(x2)[2]
        x1_crop = self.crop(x1, h, w)
        return tf.concat([x1_crop, x2], axis = 3)
    
    def skip_connection2(self, x1, x2):
        "crop x2 as x1 shape, and concatenate x1 with cropped x2"
        h = tf.shape(x1)[1]
        w = tf.shape(x1)[2]
        x2_crop = self.crop(x2, h, w)
        return tf.concat([x1, x2_crop], axis = 3)
    
    def up_sampling(self, x, channel_cnt, skip_copy):
        """
        upsampling block: 
        deconv -> skip_connection -> conv -> conv
        """
        x = tf.layers.conv2d_transpose(x,
                   filters=channel_cnt,
                   kernel_size=[2,2],
                   strides=[2,2],
                   padding='same')
        x = self.skip_connection2(skip_copy,x)
        x = self.conv_block(x, channel_cnt)
        x = self.conv_block(x, channel_cnt)
        print(str(x))
        return x

    def create_model(self, x):
        channel_cnt = 64
        x = self.conv_block(x, channel_cnt)
        x = self.conv_block(x, channel_cnt)
        print(str(x))
        copy1 = x

        # down sampling path
        channel_cnt *= 2
        x = self.down_sampling(x, channel_cnt)
        copy2 = x
        channel_cnt *= 2
        x = self.down_sampling(x, channel_cnt)
        copy3 = x
        channel_cnt *= 2
        x = self.down_sampling(x, channel_cnt)
        copy4 = x
        channel_cnt *= 2
        x = self.down_sampling(x, channel_cnt)

        # up sampling path   
        channel_cnt = channel_cnt // 2
        x = self.up_sampling(x, channel_cnt, copy4)
        channel_cnt = channel_cnt // 2
        x = self.up_sampling(x, channel_cnt, copy3)
        channel_cnt = channel_cnt // 2
        x = self.up_sampling(x, channel_cnt, copy2)
        channel_cnt = channel_cnt // 2
        x = self.up_sampling(x, channel_cnt, copy1)

        logits = tf.layers.conv2d(x,
                              filters=1,
                              kernel_size=[1,1],
                              strides=[1,1],
                              padding='same')
        print(str(logits))
        return logits
    
    def iou_cost(self, x, y):
        # http://angusg.com/writing/2016/12/28/optimizing-iou-semantic-segmentation.html
        inter=tf.reduce_sum(tf.multiply(x, y))
        union=tf.reduce_sum(tf.subtract(tf.add(x ,y),tf.multiply(x, y)))
        cost=tf.subtract(tf.constant(1.0, dtype=tf.float32),tf.divide(inter,union))
        return cost
    
    def trainer(self):
        y = tf.placeholder(tf.float32, [None, None, None, 1]) 
        x = tf.placeholder(tf.float32, [None, 519, 162, 1])
        lr = tf.placeholder(tf.float32, [])
        
        logits = self.create_model(x)
        print("y is {}".format(y))
        
        # crop logits as y's shape
        y_h = tf.shape(y)[1]
        y_w = tf.shape(y)[2]
        crop_logits = self.crop(logits, y_h, y_w)
        print("cropped logits is {}".format(str(crop_logits)))
        
        sigmoid_logits = tf.nn.sigmoid(crop_logits)
        flatten_logits=tf.reshape(crop_logits, [-1])
        print("flattened logits is {}".format(str(flatten_logits)))
        flatten_y = tf.reshape(y, [-1])
        print("flattened y is {}".format(str(flatten_y)))
        if self.loss_type == "xent":
            print("xent loss")
            cost = tf.nn.sigmoid_cross_entropy_with_logits(labels=flatten_y, logits=flatten_logits)
            cost = tf.reduce_mean(cost)
        elif self.loss_type == "iou":
            print("IoU loss")
            flatten_logits = tf.nn.sigmoid(flatten_logits)
            cost = self.iou_cost(flatten_logits, flatten_y)
        print("cost is" + str(cost))
        train_step = tf.train.AdamOptimizer(lr).minimize(cost)

        prediction = sigmoid_logits
        print("prediction is {}".format(str(prediction)))
        return cost, x, y, logits, lr, train_step, prediction, flatten_logits, flatten_y

unet = UNet(loss_type = "iou")
tf.reset_default_graph()
cost, x, y, logits, lr, train_step, prediction, flatten_logits, flatten_y = unet.trainer()

Tensor("conv2d_2/Relu:0", shape=(?, 519, 162, 64), dtype=float32)
Tensor("conv2d_4/Relu:0", shape=(?, 260, 81, 128), dtype=float32)
Tensor("conv2d_6/Relu:0", shape=(?, 130, 41, 256), dtype=float32)
Tensor("conv2d_8/Relu:0", shape=(?, 65, 21, 512), dtype=float32)
Tensor("conv2d_10/Relu:0", shape=(?, 33, 11, 1024), dtype=float32)
Tensor("conv2d_12/Relu:0", shape=(?, 65, 21, 512), dtype=float32)
Tensor("conv2d_14/Relu:0", shape=(?, 130, 41, 256), dtype=float32)
Tensor("conv2d_16/Relu:0", shape=(?, 260, 81, 128), dtype=float32)
Tensor("conv2d_18/Relu:0", shape=(?, 519, 162, 64), dtype=float32)
Tensor("conv2d_19/BiasAdd:0", shape=(?, 519, 162, 1), dtype=float32)
y is Tensor("Placeholder:0", shape=(?, ?, ?, 1), dtype=float32)
cropped logits is Tensor("strided_slice_24:0", shape=(?, ?, ?, 1), dtype=float32)
flattened logits is Tensor("Reshape:0", shape=(?,), dtype=float32)
flattened y is Tensor("Reshape_1:0", shape=(?,), dtype=float32)
IoU loss
cost isTensor("Sub_1:0", shape=(), dtype=float32

In [74]:
# make sure images exists
TRAIN_IMAGE_IDS = []
images_list = os.listdir("../data/stage1_train_class_256x256/images/")
masks_list = os.listdir("../data/stage1_train_class_256x256/masks/")
for im_name in utils.TRAIN_IMAGE_IDS:
  if im_name + ".png" in  images_list and im_name + ".png" in masks_list :
    TRAIN_IMAGE_IDS.append(im_name)
  else:
    print("{} is not found".format(im_name))
print("{} images to be trained".format(len(TRAIN_IMAGE_IDS)))

2224 images to be trained


In [88]:
# # filter out 256x256 images
# TRAIN_IMAGE_IDS = []
# for img in utils.TRAIN_IMAGE_IDS:
#   if len(img.split('_')) == 1:
#     TRAIN_IMAGE_IDS.append(img)
# # TRAIN_IMAGE_IDS = utils.TRAIN_IMAGE_IDS

TRAIN_IMAGE_CNT = len(TRAIN_IMAGE_IDS)
iterators = 10000
batch_size = 1
learning_rate_origin = 1e-4
decay = 0.95
decay_every = int(TRAIN_IMAGE_CNT / batch_size) # decay every epoch
save_every = 50
show_img_every = 30
ckpt_point = 0

with tf.Session() as sess:
    saver = tf.train.Saver(tf.global_variables())
    init = tf.global_variables_initializer()
    sess.run(init)
    saver = tf.train.Saver()
    if ckpt_point > 0:
        saver.restore(sess, '../ckpt/u-net_IoU.ckpt-' + str(ckpt_point))
    for it in range(ckpt_point+1, iterators):
        learning_rate = learning_rate_origin * (decay ** (it//decay_every))
        start_idx = it * batch_size % TRAIN_IMAGE_CNT
        end_idx = min(start_idx+batch_size, TRAIN_IMAGE_CNT)
        batch_list = TRAIN_IMAGE_IDS[start_idx: end_idx]
        x_input, y_input = batch_data(batch_list)
        loss, _, pred, fx, fy = sess.run([cost, train_step, prediction, flatten_logits, flatten_y], 
                          feed_dict={x: x_input, y: y_input, lr: learning_rate })
        print("loss at iterator {} is {}".format(it, loss))
        if it % save_every == 0:
            saver.save(sess, "../ckpt/u-net_IoU.ckpt", global_step=it)
            print("ckpt saved")
        if it % show_img_every == 20:
            print(batch_list[0])
            pred_img = pred[0,:,:,:]
            plt.imshow(x_input[0,:,:,:].squeeze())
            plt.show()
            plt.imshow(np.concatenate(( y_input[0,:,:,:], pred_img), axis=1).reshape(256, -1))
            plt.show()

loss at iterator 1 is 0.804696798324585
loss at iterator 2 is 0.9338237643241882
loss at iterator 3 is 0.8060272932052612
loss at iterator 4 is 0.9420183300971985
loss at iterator 5 is 1.0
loss at iterator 6 is 0.9571224451065063
loss at iterator 7 is 0.9651263952255249
loss at iterator 8 is 0.7091830968856812
loss at iterator 9 is 0.8725799322128296
loss at iterator 10 is 0.722870409488678


KeyboardInterrupt: 

In [None]:
"""test"""
# from cs231n assignment
def sigmoid(x):
    """
    A numerically stable version of the logistic sigmoid function.
    """
    pos_mask = (x >= 0)
    neg_mask = (x < 0)
    z = np.zeros_like(x)
    z[pos_mask] = np.exp(-x[pos_mask])
    z[neg_mask] = np.exp(x[neg_mask])
    top = np.ones_like(x)
    top[neg_mask] = z[neg_mask]
    return top / (1 + z)
  
ckpt_point = 1100
batch_size = 1
show_img_every = 1

# filter out 256x256 images to test
from scipy.ndimage import imread
TEST_IMAGE_IDS = []
for img_name in utils.TEST_IMAGE_IDS:
  h,w,_ = imread(utils.TEST_DIR +img_name + ".png").shape
  if h == 256 and w == 256:
    TEST_IMAGE_IDS.append(img_name)
init = tf.global_variables_initializer()
saver = tf.train.Saver(tf.global_variables())
with tf.Session() as sess:
    sess.run(init)
    saver = tf.train.Saver()
    saver.restore(sess, '/content/drive/Colab Notebooks/Kaggle_2018/ckpt/u-net_sigmoid_v0.1.ckpt-' + str(ckpt_point))
    for img_name in TEST_IMAGE_IDS:
      print("===============================================")
      origin = imread(utils.TEST_DIR + img_name + ".png")
      plt.imshow(origin)
      plt.show()
      tmp1 = utils.extend_sides(origin)
      tmp2 = np.transpose(tmp1, (2, 0, 1)) # CxHxW
      tmp3 = tmp2[:3,:,:]
      tmp4 = np.expand_dims(tmp3, axis=-1)
      inference_logits = sess.run(logits, \
            feed_dict={x: tmp4 })
      inference_logits = sigmoid(inference_logits)
      plt.imshow(np.transpose(inference_logits.squeeze(), (1,2,0)))
      plt.show()

In [None]:
def sigmoid(x):
    """
    A numerically stable version of the logistic sigmoid function.
    """
    pos_mask = (x >= 0)
    neg_mask = (x < 0)
    z = np.zeros_like(x)
    z[pos_mask] = np.exp(-x[pos_mask])
    z[neg_mask] = np.exp(x[neg_mask])
    top = np.ones_like(x)
    top[neg_mask] = z[neg_mask]
    return top / (1 + z)
  
ckpt_point = 200
batch_size = 1
show_img_every = 1

# filter out 256x256 images to test
from scipy.ndimage import imread
TEST_IMAGE_IDS = []
for img_name in utils.TEST_IMAGE_IDS:
  h,w,_ = imread(utils.TEST_DIR +img_name + ".png").shape
  if h == 256 and w == 256:
    TEST_IMAGE_IDS.append(img_name)
init = tf.global_variables_initializer()
saver = tf.train.Saver(tf.global_variables())
with tf.Session() as sess:
    sess.run(init)
    saver = tf.train.Saver()
    saver.restore(sess, '/content/drive/Colab Notebooks/Kaggle_2018/ckpt/u-net_sigmoid256x256_v0.1.ckpt-' + str(ckpt_point))
    for img_name in TEST_IMAGE_IDS:
      print("===============================================")
      origin = imread(utils.TEST_DIR + img_name + ".png")
      plt.imshow(origin)
      plt.show()
      tmp1 = utils.extend_sides(origin)
      tmp2 = np.transpose(tmp1, (2, 0, 1)) # CxHxW
      tmp3 = tmp2[:3,:,:]
      tmp4 = np.expand_dims(tmp3, axis=-1)
      inference_logits = sess.run(logits, \
            feed_dict={x: tmp4 })
      inference_logits = sigmoid(inference_logits)
      plt.imshow(np.transpose(inference_logits.squeeze(), (1,2,0)))
      plt.show()

In [None]:
"""
conclution:
- sigmoid binary loss better than softmax binary loss
- adam optimizer better than SGD
- 256x256 model better than general model on 256x256 test data
"""