# Author: Mrinmoy sarkar
# email: mrinmoy.pol@gmail.com
# date: 9/27/2018

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import seaborn as sns
sns.set_style('white')
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import time  
from keras.preprocessing.image import load_img
import tensorflow as tf
%matplotlib inline

# companion functions and classes

In [None]:
img_size_ori = 101
img_size_target = 128

def unroll(img):
    if img_size_ori == img_size_target:
        return img
    return img.ravel()

class batch_generator():
    def __init__(self,X,y):
        self.index = 0
        self.images = X['images'].apply(unroll).values
        self.masks = y['masks'].apply(unroll).values
        self.max_index = X.shape[0]
        
    def get_next_batch(self,batch_size):
        if self.index+batch_size <= self.max_index:
            x = self.images[self.index:self.index+batch_size]
            y = self.masks[self.index:self.index+batch_size]
            self.index += batch_size
        elif (self.max_index - self.index) > 0:
            x = self.images[self.index:self.max_index]
            y = self.masks[self.index:self.max_index]
            self.index = self.max_index
        else:
            self.reset_index()
            x = self.images[self.index:self.index+batch_size]
            y = self.masks[self.index:self.index+batch_size]
            self.index += batch_size
        return np.array(x.tolist()),np.array(y.tolist())
    
    def get_all_data(self):
        x = self.images[0:self.max_index]
        y = self.masks[0:self.max_index]
        return np.array(x.tolist()),np.array(y.tolist())
    
    def reset_index(self):
        self.index = 0

# Load data from file

In [None]:
train_df = pd.read_csv("../data/train.csv", index_col="id", usecols=[0])
depths_df = pd.read_csv("../data/depths.csv", index_col="id")
train_df = train_df.join(depths_df)
test_df = depths_df[~depths_df.index.isin(train_df.index)]

In [None]:
train_df["images"] = [np.array(load_img("../data/train/images/{}.png".format(idx), color_mode="grayscale")) / 255 for idx in (train_df.index)]
train_df["masks"] = [np.array(load_img("../data/train/masks/{}.png".format(idx), color_mode="grayscale")) / 255 for idx in (train_df.index)]

In [None]:
train_df["images"][0].shape

# Show some sample image data

In [None]:
offset = 20
for ind in [1,5,9]:
    plt.subplot(3,4,ind)
    plt.imshow(train_df["images"][(ind-1)//2+offset],cmap='gray')
    plt.subplot(3,4,ind+1)
    plt.imshow(train_df["masks"][(ind-1)//2+offset],cmap='gray')
    plt.subplot(3,4,ind+2)
    plt.imshow(train_df["images"][((ind-1)//2)+1+offset],cmap='gray')
    plt.subplot(3,4,ind+3)
    plt.imshow(train_df["masks"][((ind-1)//2)+1+offset],cmap='gray')
# train_df["images"].shape

# Split data into train and test set

In [None]:
X = pd.DataFrame(data=train_df["images"],columns=['images'])
y = pd.DataFrame(data=train_df["masks"],columns=['masks'])
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)#, random_state=101)
batch_gen = batch_generator(X_train,y_train)
testbatch_gen = batch_generator(X_test,y_test)
total_no_of_train_samples = X_train.shape[0]

# Build the DCNN Auto-Encoder Network

In [None]:
tf.reset_default_graph()
learning_rate = 0.01
dilation_rate_r = 1

#with tf.device('/gpu:0'):
# if True:
x = tf.placeholder(tf.float32,shape=[None,img_size_ori*img_size_ori])
y_true = tf.placeholder(tf.float32,shape=[None,img_size_ori*img_size_ori])

inputs_ori = tf.reshape(x,[-1,img_size_ori,img_size_ori,1])#tf.placeholder(tf.float32,shape=(None,128,128,1),name='inputs')
targets_ori = tf.reshape(y_true,[-1,img_size_ori,img_size_ori,1])#tf.placeholder(tf.float32,shape=(None,128,128,1),name='targets')

inputs = tf.image.resize_images(inputs_ori,size=(128,128),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
#     targets = tf.image.resize_images(targets_ori,size=(128,128),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)


conv1 = tf.layers.conv2d(inputs=inputs,filters=8,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)
maxpool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=(2,2),strides=(2,2),padding='same')

conv2 = tf.layers.conv2d(inputs=maxpool1,filters=8,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)
maxpool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=(2,2),strides=(2,2),padding='same')

conv3 = tf.layers.conv2d(inputs=maxpool2,filters=16,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)
maxpool3 = tf.layers.max_pooling2d(inputs=conv3,pool_size=(2,2),strides=(2,2),padding='same')

conv4 = tf.layers.conv2d(inputs=maxpool3,filters=16,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)
maxpool4 = tf.layers.max_pooling2d(inputs=conv4,pool_size=(2,2),strides=(2,2),padding='same')

conv5 = tf.layers.conv2d(inputs=maxpool4,filters=8,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)
encoded = tf.layers.max_pooling2d(inputs=conv5,pool_size=(2,2),strides=(2,2),padding='same')


upsampler1 = tf.image.resize_images(encoded,size=(8,8),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
conv6 = tf.layers.conv2d(inputs=upsampler1,filters=8,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)

upsampler2 = tf.image.resize_images(conv6,size=(16,16),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
conv7 = tf.layers.conv2d(inputs=upsampler2,filters=16,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)

upsampler3 = tf.image.resize_images(conv7,size=(32,32),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
conv8 = tf.layers.conv2d(inputs=upsampler3,filters=16,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)

upsampler4 = tf.image.resize_images(conv8,size=(64,64),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
conv9 = tf.layers.conv2d(inputs=upsampler4,filters=8,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)

upsampler5 = tf.image.resize_images(conv9,size=(128,128),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
conv10 = tf.layers.conv2d(inputs=upsampler5,filters=8,kernel_size=(3,3),dilation_rate=(dilation_rate_r,dilation_rate_r),padding='same',activation=tf.nn.relu)

downsampler = tf.image.resize_images(conv10,size=(img_size_ori,img_size_ori),method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

logits = tf.layers.conv2d(inputs=downsampler,filters=1,kernel_size=(3,3),padding='same',activation=None)
decoded = tf.nn.sigmoid(logits)
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_ori,logits=logits)
cost = tf.reduce_mean(loss)
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost)



# Train the network and calculate Loss

In [None]:
t = time.time()
epochs = 1001
batch_size = 100
training_error = []
test_error = []
no_of_iteration = []
loadmodel_fromfile = True
iterationNo = 50000
model_file_name = "./tmp/model_"+str(iterationNo)+".ckpt"

saver = tf.train.Saver()

with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    if loadmodel_fromfile:
        saver.restore(sess, model_file_name)
    else:
        options = tf.RunOptions(output_partition_graphs=True)
        metadata = tf.RunMetadata()
        sess.run(tf.global_variables_initializer(),options=options, run_metadata=metadata)
    
    for e in range(epochs):
        t1 = time.time()
        for ii in range(total_no_of_train_samples//batch_size):
            x_train_batch,y_train_batch = batch_gen.get_next_batch(batch_size=batch_size)
            batch_cost, _ = sess.run([cost,optimizer],feed_dict={x:x_train_batch,y_true:y_train_batch})
#             print("Trainning loss: {:.4f}".format(batch_cost))
#             print(sess.run([cost,optimizer],feed_dict={x:x_train,y_true:y_train},options=options, run_metadata=metadata))
#             print(metadata.partition_graphs)
        print("epoch No.: ", e, ' time per epoch: ', (time.time()-t1),' batch cost: ', batch_cost)
        if e%50 == 0 and e != 0:
            tr_loss = 0
            te_loss = 0
            n = 0
            t1 = time.time()
            for i in range(2):
                for ii in range(total_no_of_train_samples//batch_size):
                    xt,yt = batch_gen.get_next_batch(batch_size=batch_size)
                    training_loss = sess.run(cost,feed_dict={x:xt,y_true:yt})
                    tr_loss += training_loss

                    xtest,ytest = testbatch_gen.get_next_batch(batch_size=batch_size)
                    test_loss = sess.run(cost,feed_dict={x:xtest,y_true:ytest})
                    te_loss += test_loss
                    n += 1 
            print('training loss: ', tr_loss/n, ' test loss: ', te_loss/n, 'test time: ', time.time()-t1)
            training_error.append(tr_loss/n)
            test_error.append(te_loss/n)
            no_of_iteration.append(e+iterationNo)
    save_path = saver.save(sess, "./tmp/model_"+str(e+iterationNo)+".ckpt")
    print("Model saved in path: %s" % save_path)
    training_time = time.time()-t
    print('training time: ', training_time)
    

# Plot the training and testing loss

In [None]:
data=[no_of_iteration,training_error,test_error]
data = np.array(data)
data = data.T
df = pd.DataFrame(data=data,columns=['no_of_iteration','training_error','test_error'])

if loadmodel_fromfile:
    df_old = pd.read_csv('trainerror.csv')
    concated_pd = pd.concat([df_old,df],axis=0)
    concated_pd = concated_pd.reset_index(drop=True)
    
else:
    concated_pd = df

concated_pd.to_csv(path_or_buf='trainerror.csv',index=False)

no_of_iteration = concated_pd['no_of_iteration']
training_error = concated_pd['training_error']
test_error = concated_pd['test_error']

plt.plot(no_of_iteration,training_error,label="Training Loss")
plt.plot(no_of_iteration,test_error,label="Test Loss")
plt.grid()
plt.xlabel("No. of Iteration")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Predict new instances

In [None]:
iterationNo=51000
model_file_name = "./tmp/model_"+str(iterationNo)+".ckpt"
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    saver.restore(sess, model_file_name)
    xtest,ytest = testbatch_gen.get_next_batch(batch_size=6)
    print(xtest.shape)
    output = sess.run(decoded,feed_dict={x:xtest})
    
j=1
plt.subplots_adjust(hspace=0.6)
for i in range(6):
    y_true= ytest[i]
    y_true = y_true.reshape((101,101))
    y_pred = output[i].reshape(101,101)
    y_pred = y_pred>0.5 # 0.5 is the threshold
    plt.subplot(3,4,j)
    plt.imshow(y_true,cmap='gray')
    plt.title('ground truth')
    plt.subplot(3,4,j+1)
    plt.imshow(y_pred,cmap='gray')
    plt.title('predicted')
    j=j+2

# 10-fold cross validation

In [None]:
kf = KFold(n_splits=10)
for train,test in kf.split(train_df):
    train = train_df.iloc[train]
    test = train_df.iloc[test]
    X_train = pd.DataFrame(data=train["images"],columns=['images'])
    y_train = pd.DataFrame(data=train["masks"],columns=['masks'])
    X_test =  pd.DataFrame(data=test["images"],columns=['images'])
    y_test =  pd.DataFrame(data=test["masks"],columns=['masks'])
    batch_gen = batch_generator(X_train,y_train)
    testbatch_gen = batch_generator(X_test,y_test)
    total_no_of_train_samples = X_train.shape[0]
    epochs = 500
    batch_size = 100
    training_error = []
    test_error = []
    no_of_iteration = []
    loadmodel_fromfile = True
    iterationNo = 20000
    model_file_name = "./tmp/model_"+str(iterationNo)+".ckpt"
    saver = tf.train.Saver()

    with tf.Session() as sess:
        if loadmodel_fromfile:
            saver.restore(sess, model_file_name)
        else:
            options = tf.RunOptions(output_partition_graphs=True)
            metadata = tf.RunMetadata()
            sess.run(tf.global_variables_initializer(),options=options, run_metadata=metadata)

        for e in range(epochs):
            for ii in range(total_no_of_train_samples//batch_size):
                x_train_batch,y_train_batch = batch_gen.get_next_batch(batch_size=batch_size)
#                 print(x_train_batch.shape)
#                 print(y_train_batch.shape)
                sess.run([cost,optimizer],feed_dict={x:x_train_batch,y_true:y_train_batch})
            
        tr_loss = 0
        te_loss = 0
        n = 0       
        for i in range(2):
            for ii in range(total_no_of_train_samples//batch_size):
                xt,yt = batch_gen.get_next_batch(batch_size=batch_size)
                training_loss = sess.run(cost,feed_dict={x:xt,y_true:yt})
                tr_loss += training_loss

                xtest,ytest = testbatch_gen.get_next_batch(batch_size=batch_size)
                test_loss = sess.run(cost,feed_dict={x:xtest,y_true:ytest})
                te_loss += test_loss
                n += 1 
        print('training loss: ', tr_loss/n, ' test loss: ', te_loss/n)
        training_error.append(tr_loss/n)
        test_error.append(te_loss/n)
        
print("10-fold training loss: ", sum(training_error)/len(training_error), " and test loss: ",  sum(test_error)/len(test_error))


In [None]:
x_train_batch.shape

In [None]:
xtest[0]