In [1]:
import tensorflow as tf
import tensorflow.compat.v1 as tf1
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage

In [None]:
def conv2d(input_images, weight):
    return tf.nn.conv2d(input_images, weight ,strides=[1,1,1,1], padding='SAME',name="conv")

In [None]:
def max_pool(x):
    return tf.nn.max_pool(x,[1,2,2,1],[1,2,2,1], padding='VALID',name='maxpool')

In [None]:
def conv_bn_relu(input_layer, bias_shape, kernel_shape,scope ,reuse =False ):
    with tf.compat.v1.variable_scope(scope, reuse = reuse):
        W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("biases", bias_shape ,initializer=tf1.glorot_uniform_initializer())
        conv = conv2d(input_layer, W)
        output = tf.nn.bias_add(conv, b)
        normal = tf.compat.v1.layers.batch_normalization(output)
        out = tf.nn.relu(normal)
    return out
        
    

In [None]:
def conv(input_layer, bias_shape, kernel_shape,scope, reuse=False):
    with tf.compat.v1.variable_scope(scope,reuse = reuse):
        W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("biases", bias_shape ,initializer=tf1.glorot_uniform_initializer())
        conv = conv2d(input_layer, W)
        out = tf.nn.bias_add(conv, b)
        
    return out

In [None]:
def deconv(input_layer, bias_shape, kernel_shape, output_shape, scope ,reuse =False):
    with tf.compat.v1.variable_scope(scope, reuse = reuse):
        W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("bias", bias_shape, initializer=tf1.glorot_uniform_initializer())
        deconv = tf.nn.conv2d_transpose(input_layer, W, output_shape, strides = [1,2,2,1])
        out = tf.nn.bias_add(deconv, b)
        
        return out
        

In [None]:
def inner_product(left, right, disp, width):
    disp_vol = []
    for i in range(disp):
        output = tf.reduce_sum(tf.multiply(left, right[:,:,disp-i-1:disp -1 -i + width,: ]),axis =3)
        disp_vol.append(output)
        logits = tf.transpose(tf.stack(disp_vol),[1,2,3,0])
        
    return logits

def compute_loss(y_truth, y_pre):

    num_classes = 129
    valid_pixels = tf.not_equal(y_truth, 0)
    labels = tf.reshape(tf.boolean_mask(y_truth, valid_pixels),[-1])
    logits = tf.reshape(tf.boolean_mask(y_pre, valid_pixels),[-1, 129])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits)
    cross_entropy = tf.reduce_mean(loss)
  
    return cross_entropy

In [None]:
def network_4(input_image, n_channels, n_filters, batch_size, H,W, reuse=False):
    n1 = conv_bn_relu(input_image,bias_shape=[n_filters], kernel_shape=[3,3,n_channels,n_filters],scope ='conv1',reuse = reuse )
    n2 = conv_bn_relu(n1, bias_shape=[n_filters], kernel_shape=[3, 3, n_filters, n_filters],scope = 'conv2',reuse=reuse )
    n2 = max_pool(n2)
    n3 = conv_bn_relu(n2, [n_filters], kernel_shape=[3,3,n_filters,n_filters],scope ='conv3',reuse=reuse)
    n4 = conv(n3,[n_filters],[3,3,n_filters,n_filters],'conv4',reuse= reuse)
    n5 = deconv(n4, [n_filters], [3, 3, n_filters, n_filters],[batch_size , H, W, n_filters],'deconv1',reuse = reuse)
    
    return n5

In [None]:
def network_7(input_image, n_channels, n_filters, batch_size, H,W, reuse=False):
    
    n1 = conv_bn_relu(input_image, bias_shape=[n_filters], kernel_shape=[3, 3, n_channels,n_filters],scope ='conv1',reuse = reuse )
    n2 = conv_bn_relu(n1, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], scope = 'conv2', reuse=reuse )
    
    n2 = max_pool(n2)
    
    n3 = conv_bn_relu(n2, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], scope = 'conv3', reuse=reuse)
    n4 = conv_bn_relu(n3, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], scope = 'conv4', reuse=reuse)
    
    n4 = max_pool(n4)
    
    n5 = conv_bn_relu(n4, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], scope = 'conv5', reuse=reuse)
    n6 = conv_bn_relu(n5, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], scope = 'conv6', reuse=reuse)
    
    n7 = conv(n6,[n_filters],[3,3,n_filters,n_filters],'conv7',reuse= reuse)
    
    n8 = deconv(n7, [n_filters], [3, 3, n_filters, n_filters],[batch_size , int(H /2), int(W /2), n_filters],'deconv1',reuse = reuse)
    n9 = deconv(n8, [n_filters], [3, 3, n_filters, n_filters],[batch_size , H, W, n_filters],'deconv2',reuse = reuse)
    
    return n9
    

In [None]:
max_disp = 128
rec_field = 28
patch_right = rec_field + max_disp
batch_size = 8
num_classes = max_disp + 1
left_shape = [batch_size, rec_field, rec_field, 3]
right_shape = [batch_size, rec_field, patch_right,3]

gt_shape = [batch_size, rec_field, rec_field]

tf.compat.v1.disable_eager_execution()
image_left  = tf.compat.v1.placeholder(tf.float32, left_shape,  name='image_left' )
image_right = tf.compat.v1.placeholder(tf.float32, right_shape, name='image_right')
im_gt = tf.compat.v1.placeholder(tf.int32, gt_shape, name='groud_truth')

with tf.compat.v1.name_scope("stereo_matching") as scope:
    left_network =  network_7(image_left,  3, 64, batch_size, rec_field, rec_field)
    right_network = network_7(image_right, 3, 64, batch_size, rec_field, patch_right,reuse=True)
    output = inner_product(left_network, right_network, num_classes, rec_field)
writer = tf1.summary.FileWriter('./graphs',graph=tf1.get_default_graph())
loss = compute_loss(im_gt,output)

In [None]:
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate,
                                                     global_step, 100000, 0.96, staircase=True)
train_network = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss,global_step=global_step)

In [None]:
path = '/media/sansii/Software/san_projects/Major_project/KITTI_dataset/numpy'
epoch = 1000
left = np.load(path+'/image_left_2.npy')
left = left.astype('float32')
right = np.load(path+'/image_right_2.npy')
right = right.astype('float32')
gt = np.load(path+'/im_gt_2.npy')
variable_init = tf1.global_variables_initializer()
with tf1.Session() as sess:
    sess.run(variable_init)
    for i in range(epoch):
        for j in range(int(left.shape[0] / batch_size)):
            a = j * batch_size
            b = a + batch_size
            _, L = sess.run([train_network, loss],feed_dict = {image_left: left[a:b,:,:,:],
                                                                  image_right: right[a:b,:,:,:],
                                                                  im_gt: gt[a:b,:,:]})
            if j%20 == 0 :
                print('\r',"Epoch:",str(i)," Step:",str(j)," Loss:",'{:.5}'.format(str(L)),end='')
        

In [None]:
a = np.random.randint(0,20,size=(8,28,28))
b = np.random.randint(0,20,size=(8,28,28))

def error(output,y_,threshold=5):
    errors = np.abs(output-y_)
    valid_pixels = errors[y_!=0]
    n_err   = np.sum(valid_pixels > threshold)
    n_total = len(valid_pixels)
    return float(n_err)/float(n_total) , n_err, valid_pixels.shape
    
print(error(a,b))
  

In [6]:
def conv2d(input_images, weight):
    return tf.nn.conv2d(input_images, weight ,strides=[1,1,1,1], padding='SAME',name="conv")

def max_pool(x):
    return tf.nn.max_pool(x,[1,2,2,1],[1,2,2,1], padding='VALID',name='maxpool')

def conv_bn_relu(input_layer, bias_shape, kernel_shape, phase, scope ,reuse =False ):
    with tf.compat.v1.variable_scope(scope, reuse = reuse):
        W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("biases", bias_shape ,initializer=tf1.glorot_uniform_initializer())
        conv = conv2d(input_layer, W)
        output = tf.nn.bias_add(conv, b)
        normal = tf.compat.v1.layers.batch_normalization(output, training = phase)
        out = tf.nn.relu(normal)
    return out

def conv(input_layer, bias_shape, kernel_shape,scope, reuse=False):
    with tf.compat.v1.variable_scope(scope,reuse = reuse):
        W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("biases", bias_shape ,initializer=tf1.glorot_uniform_initializer())
        conv = conv2d(input_layer, W)
        out = tf.nn.bias_add(conv, b)
        
    return out

def deconv(input_layer, bias_shape, kernel_shape, output_shape, scope ,reuse =False):
    with tf.compat.v1.variable_scope(scope, reuse = reuse):
        W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("bias", bias_shape, initializer=tf1.glorot_uniform_initializer())
        deconv = tf.nn.conv2d_transpose(input_layer, W, output_shape, strides = [1,2,2,1])
        out = tf.nn.bias_add(deconv, b)
        
        return out
def inner_product(left, right, disp, width):
    disp_vol = []
    for i in range(disp):
        output = tf.reduce_sum(tf.multiply(left, right[:,:,disp-i-1:disp -1 -i + width,: ]),axis =3)
        disp_vol.append(output)
        logits = tf.transpose(tf.stack(disp_vol),[1,2,3,0])
        
    return logits

def compute_loss(y_truth, y_pre):

    num_classes = 129
    valid_pixels = tf.not_equal(y_truth, 0)
    labels = tf.reshape(tf.boolean_mask(y_truth, valid_pixels),[-1])
    logits = tf.reshape(tf.boolean_mask(y_pre, valid_pixels),[-1, 129])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits)
    cross_entropy = tf.reduce_mean(loss)
  
    return cross_entropy
def network_4(input_image, n_channels, n_filters, batch_size, H,W, phase , reuse=False):
    n1 = conv_bn_relu(input_image,bias_shape=[n_filters], kernel_shape=[3,3,n_channels,n_filters], phase = phase, scope ='conv1',reuse = reuse )
    n2 = conv_bn_relu(n1, bias_shape=[n_filters], kernel_shape=[3, 3, n_filters, n_filters], phase = phase, scope = 'conv2',reuse=reuse )
    n2 = max_pool(n2)
    n3 = conv_bn_relu(n2, [n_filters], kernel_shape=[3,3,n_filters,n_filters], phase = phase, scope ='conv3',reuse=reuse)
    n4 = conv(n3,[n_filters],[3,3,n_filters,n_filters],'conv4',reuse= reuse)
    n5 = deconv(n4, [n_filters], [3, 3, n_filters, n_filters],[batch_size , H, W, n_filters],'deconv1',reuse = reuse)
    
    return n5

def accuracy_calculate(Y_truth, Y_pre):
    compare = tf.equal(Y_truth, tf.cast(tf.argmax(Y_pre,-1), tf.int32))
    accuracy = tf.reduce_mean(tf.cast(compare,"float")) 
    return accuracy

def error(output,y_,threshold=5):
    errors = np.abs(output-y_)
    valid_pixels = errors[y_!=0]
    n_err   = np.sum(valid_pixels > threshold)
    n_total = len(valid_pixels)
    return float(n_err)/float(n_total)

def network_7(input_image, n_channels, n_filters, batch_size, H, W, phase, reuse=False):
    
    n1 = conv_bn_relu(input_image, [n_filters], kernel_shape=[3, 3, n_channels,n_filters], phase = phase, scope ='conv1',reuse = reuse )
    n2 = conv_bn_relu(n1, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], phase = phase,  scope = 'conv2', reuse=reuse )
    
    n2 = max_pool(n2)
    
    n3 = conv_bn_relu(n2, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], phase = phase, scope = 'conv3', reuse=reuse)
    n4 = conv_bn_relu(n3, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], phase = phase, scope = 'conv4', reuse=reuse)
    
    n4 = max_pool(n4)
    
    n5 = conv_bn_relu(n4, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], phase = phase, scope = 'conv5', reuse=reuse)
    n6 = conv_bn_relu(n5, [n_filters], kernel_shape = [3, 3, n_filters, n_filters], phase = phase, scope = 'conv6', reuse=reuse)
    
    n7 = conv(n6,[n_filters],[3,3,n_filters,n_filters],'conv7',reuse= reuse)
    
    n8 = deconv(n7, [n_filters], [3, 3, n_filters, n_filters],[batch_size , int(H /2), int(W /2), n_filters],'deconv1',reuse = reuse)
    n9 = deconv(n8, [n_filters], [3, 3, n_filters, n_filters],[batch_size , H, W, n_filters],'deconv2',reuse = reuse)
    
    return n9
    
def init_placeholder():
    tf1.disable_eager_execution()
    tf1.reset_default_graph()
    phase = tf1.placeholder(tf1.bool)
    with tf1.name_scope("Im_left"):
        image_left  = tf.compat.v1.placeholder(tf.float32, left_shape,  name='image_left' )
    with tf1.name_scope("Im_right"):
        image_right = tf.compat.v1.placeholder(tf.float32, right_shape, name='image_right')
    with tf1.name_scope("Im_gt"):
        im_gt = tf.compat.v1.placeholder(tf.int32, gt_shape, name='groud_truth')

    return phase, image_left, image_right, im_gt


def make_architecture(phase, image_left, image_right, im_gt):
 
    with tf.compat.v1.name_scope("stereo_matching") as scope:
        left_network =  network_7(image_left,  3, 64,batch_size, rec_field, rec_field, phase = phase)
        right_network = network_7(image_right, 3, 64,batch_size, rec_field, patch_right,phase=phase, reuse=True)

    with tf1.name_scope("Model"):
        output = inner_product(left_network, right_network, num_classes, rec_field)


    with tf1.name_scope("Loss"):
        loss = compute_loss(im_gt,output)


    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.00001
    learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate,
                                                      global_step, 4000, 0.96, staircase=True)
    update_ops = tf1.get_collection(tf1.GraphKeys.UPDATE_OPS)
    with tf1.control_dependencies(update_ops):
        train_network = tf.compat.v1.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss,global_step=global_step)

    return loss, output, train_network


def inner_product_test(h9_left, h9_right,batch_size,rows, cols, n_classes):
    prod=np.ones((batch_size,rows,cols,n_classes))*(-1e9)
    start=0
    end = cols

    while start<cols-1:
        for disp in range(n_classes):
            if (end-disp  > 0):
                if (cols > start-disp ):

                    left_features = h9_left[:,:,max(start,disp):min(end,cols),:]
                    right_features = h9_right[:,:,max(0,start-disp ):min(end-disp ,cols-disp),:]
                    
                    multiplication = np.multiply(left_features,right_features)
                    inner_product = np.sum(multiplication,axis=3)
                    prod[:,:,max(start,disp):min(end,cols),disp]=inner_product
        
        start = end
        end += cols

    return prod


def processs_input_image(image):
    image=np.array(image,dtype=np.float32)
    image=(image-np.mean(image))/np.std(image)
    return image
  
def train(left_images,right_images,disp_images):

    phase, image_left, image_right, im_gt = init_placeholder()
    loss, output, train_network = make_architecture(phase, image_left, image_right, im_gt)
    log_dir = '/content/drive/My Drive/Colab Notebooks/log_dirs'
    saver = tf1.train.Saver()
    tf1.summary.scalar("Loss", loss)
    merged_summary_op = tf1.summary.merge_all() 
    writer = tf1.summary.FileWriter('./graphs',graph=tf1.get_default_graph())

    variable_init = tf1.global_variables_initializer()
    with tf1.Session() as sess:
      #sess.run(variable_init)
        saver.restore(sess, path_check_1+"/model_1.ckpt")
        for i in range(epoch):
            left, right, gt=load_random_patch(left_images,right_images,disp_images,rec_field,right_left,max_disp,batch_size,valid_pixels_train)

            left = left.astype('float32') / 255
            right = right.astype('float32') / 255
            _, summ = sess.run([train_network, merged_summary_op],feed_dict = {image_left: left ,
                                                                  image_right: right,
                                                                  im_gt: gt,
                                                                  phase : True})
            writer.add_summary(summ, i)

            if i%50 == 0 :

                L_train, out_train = sess.run([loss, output], feed_dict={image_left: left,
                                                            image_right: right,
                                                            im_gt: gt, 
                                                            phase : False})
                err_train = error(np.argmax(out_train,-1), gt)



                print('\r',"Epoch:",str(i)," Loss:",'{:.5}'.format(str(L_train)), "Error:",'{:.5}'.format(str(err_train)), end='')


            if i % 500 == 0:
                saver.save(sess, path_check_2+"/model_2.ckpt")

def test(im_left, im_right):
  
    
    tf1.disable_eager_execution()
    phase, image_left, image_right, im_gt = init_placeholder()
    with tf.compat.v1.name_scope("stereo_matching") as scope:
        n9_left =  network_7(image_left,  3, 64, batch_size, img_h, img_w, phase = phase)
        n9_right = network_7(image_right, 3, 64, batch_size, img_h, img_w, phase = phase, reuse=True)
    
    
    sess = tf1.Session()
    saver = tf1.train.Saver()
    saver.restore(sess, path_check_1+'model_1.ckpt')

    #Adding axis to image
    left =  im_left [np.newaxis,...].astype('float') / 255
    right = im_right[np.newaxis,...].astype('float') / 255
    
    #Resize image
    left_resized  = tf1.image.resize_images(left,  resize_shape)
    right_resized = tf1.image.resize_images(right, resize_shape)


    
    left_, right_ = sess.run([left_resized, right_resized])

    left_vol, right_vol = sess.run([n9_left, n9_right],feed_dict={ image_left : left_,
                                                                   image_right: right_,
                                                                   phase : False})
    
    prod = inner_product_test(left_vol, right_vol, batch_size, img_h, img_w, num_classes)
    
    #out_disp = np.argmax(prod, 3)[0,:,:]
    
    return prod


In [8]:

data_path = '/media/sansii/Software/san_projects/Major_project/KITTI_dataset/2015/training/'
path_check_1 = '/media/sansii/Software/san_projects/Major_project/Checkpoints_1/'
left =  ndimage.imread(data_path+"image_2/000008_10.png")
right = ndimage.imread(data_path+"image_3/000008_10.png") 
gt = ndimage.imread(data_path+"disp_noc_0/000005_10.png") 
max_disp = 128
batch_size = 1
img_h = 376
img_w = 1240
resize_shape = [img_h , img_w]
num_classes = max_disp + 1
left_shape =  [batch_size, img_h, img_w,  3]
right_shape = [batch_size, img_h, img_w,  3]
gt_shape =    [batch_size, img_h, img_w]

out = test(left, right)

INFO:tensorflow:Restoring parameters from /media/sansii/Software/san_projects/Major_project/Checkpoints_1/model_1.ckpt


In [10]:
%matplotlib
plt.figure(0)
plt.imshow(out,cmap='jet')
plt.show()

Using matplotlib backend: TkAgg
