In [5]:
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from scipy import ndimage
import numpy as np


In [None]:
def conv2d(input_images, weight, stride = 1):
    return tf.nn.conv2d(input_images, weight ,strides=[1,stride, stride,1], padding='VALID',name="conv")
        

def conv_elu(input_layer, k, in_filter, ou_filter, stride, scope, activation = tf.nn.elu, reuse=False):
    
    with tf.compat.v1.variable_scope(scope, reuse = reuse):
        W = tf.compat.v1.get_variable("weight", [k, k, in_filter, ou_filter], initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("biases", [ou_filter],initializer=tf1.glorot_uniform_initializer())
        
        #we need pyrimad which output is half of its input so, need to pad input.
        p = np.floor((k - 1) / 2).astype('int32')
        padding = tf.constant([[0,0],[p, p],[p, p],[0,0]])
        p_x = tf.pad(input_layer, padding)
        
        #padded input
        conv = conv2d(p_x, W, stride = stride)
        output = tf.nn.bias_add(conv, b)
        out = activation(output)
        
    return out


def upsampling(input_layer, factor):    
    return tf.keras.layers.UpSampling2D(size=(factor, factor))(input_layer)
    
             

def upconv(input_layer, k, in_filter, ou_filter, scope, reuse=False):
    
    with tf.compat.v1.variable_scope(scope, reuse = reuse):    
        #Upsampling
        upsample = upsampling(input_layer, 2)
        out = conv_elu(upsample, k, in_filter, ou_filter, 1, scope='conv_elu')
        return out


def conv_block(input_layer, k, in_filter, ou_filter, scope):
    
    c1 = conv_elu(input_layer, k,  in_filter, ou_filter,  1, scope=scope )
    c2 = conv_elu(c1,          k,  ou_filter, ou_filter,  2, scope=scope+'b')
    
    return c2
    
def get_disp(x, in_filter,scope):
    disp = 0.3 * conv_elu(x, 3, in_filter, 2, 1, scope = scope, activation = tf.nn.sigmoid)
    return disp


In [None]:
def init_placeholder():
    
    tf1.disable_eager_execution()
    tf1.reset_default_graph()
    
    with tf1.name_scope("Input_image"):
        input_layer = tf1.placeholder('float', shape = input_shape)
        
    return input_layer


def make_architecture(input_layers):
    
    with tf1.name_scope("ENCNN"):
    
        with tf1.name_scope("encoder"):
            conv1 = conv_block(input_layers, 7,  3,  32, 'conv1')#2
            conv2 = conv_block(conv1,        5, 32,  64, 'conv2')#4
            conv3 = conv_block(conv2,        3, 64, 128, 'conv3')#8
            conv4 = conv_block(conv3,        3, 128,256, 'conv4')#16
            conv5 = conv_block(conv4,        3, 256,512, 'conv5')#32
                conv6 = conv_block(conv5,        3, 512,512, 'conv6')#64
            conv7 = conv_block(conv6,        3, 512,512, 'conv7')#128
            
            
            


        with tf1.name_scope("decoder"):
            #upsampling 7
            upconv7 = upconv(conv7,     3,  512,  512, scope =  'upconv7')
            concat7 = tf.concat([upconv7, conv6], axis=-1, name = 'concat7')
            iconv7  = conv_elu(concat7, 3, 1024,  512, 1, scope= 'iconv7')

            #upsampling 6
            upconv6 = upconv(iconv7,    3,  512,  512, scope =  'upconv6')
            concat6 = tf.concat([upconv6, conv5], axis=-1, name = 'concat6')
            iconv6  = conv_elu(concat6, 3, 1024,  512, 1, scope= 'iconv6')

            #upsampling 5
            upconv5 = upconv(iconv6,    3,  512,  256, scope =  'upconv5')
            concat5 = tf.concat([upconv5, conv4], axis=-1, name= 'concat5')
            iconv5  = conv_elu(concat5, 3,  512,  256, 1, scope= 'iconv5')

            #upsampling 4
            upconv4 = upconv(iconv5,    3,   256,  128, scope = 'upconv4')
            concat4 = tf.concat([upconv4, conv3], axis=-1, name ='concat4')
            iconv4  = conv_elu(concat4, 3, 256,  128, 1, scope= 'iconv4')
            disp4   = get_disp(iconv4, 128, scope= 'disp4')
            updisp4 = upsampling(disp4, 2)

            #upsampling 3
            upconv3 = upconv(iconv4,    3,  128,  64, scope = 'upconv3')
            concat3 = tf.concat([upconv3, conv2, updisp4], axis=-1, name='concat3')
            iconv3  = conv_elu(concat3, 3, 130,  64, 1, scope= 'iconv3')
            disp3   = get_disp(iconv3,  64, scope = 'disp3')
            updisp3 = upsampling(disp3, 2)

            #upsampling 2
            upconv2 = upconv(iconv3,    3,  64,   32, scope = 'upconv2')
            concat2 = tf.concat([upconv2, conv1, updisp3], axis=-1, name='concat2')
            iconv2  = conv_elu(concat2, 3,  66,   32, 1, scope= 'iconv2')
            disp2   = get_disp(iconv2,  32, scope = 'disp2')
            updisp2 = upsampling(disp2, 2)

            #upsampling 1
            upconv1 = upconv(iconv2,    3,  32,   16, scope = 'upconv1')
            concat1 = tf.concat([upconv1, updisp2], axis=-1, name='convat1')
            iconv1  = conv_elu(concat1, 3,  18,   16, 1, scope= 'iconv1')
            disp1   = get_disp(iconv1,  16, scope = 'disp1')
        
    return disp1, disp2, disp3 ,disp4
        

In [None]:
def train(train_input_image):
    logdir = '/media/sansii/Software/san_projects/Major_project/Moncular_depth_estimation_data/'
    input_layer = init_placeholder()
    
    disp1, disp2, disp3 , disp4 = make_architecture(input_layer)
    
    writer = tf1.summary.FileWriter(logdir+'./graph' , graph=tf1.get_default_graph())
    
    

In [None]:
batch_size = 32
img_h = 256
img_w = 512
input_shape = [batch_size, img_h, img_w, 3]
train(0)

#Testing Section

In [None]:
tf1.disable_eager_execution()
tf1.reset_default_graph()
n_filters = 64
n_channels = 3
kernel_shape = [7, 7, n_channels,n_filters]
bias_shape = [n_filters]
W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
data_path = '/media/sansii/Software/san_projects/Major_project/KITTI_dataset/2015/testing/'
left = ndimage.imread(data_path+"image_2/000083_10.png")

left_shape = (1, left.shape[0], left.shape[1], 3)
input_layer  = tf.compat.v1.placeholder(tf.float32, left_shape,  name='image_left' )
p = np.floor((kernel_shape[0] - 1) / 2).astype('int32')
padding = tf.constant([[0,0],[p, p],[p, p],[0,0]])
p_x = tf.pad(input_layer, padding)
conv = conv2d(p_x, W, stride = 2)
conv = tf.nn.elu(conv)

In [None]:
left.shape

In [None]:
var = tf1.global_variables_initializer()
with tf1.Session() as sess:
    sess.run(var)
    left_ = left[np.newaxis,...]
    print(left_.shape)
    out = sess.run(conv, feed_dict={input_layer: left_})
   
    print(out.shape)



In [None]:
%reset_selective conv2d


# STN

In [None]:
def STN(img, disparity):
    
    def interpolate(img, x, y):
        #bilinear_interpolation

        #For corner pixel there is no either left or right / top or down pixels so padding is necessary
        img = tf.pad(img, paddings= ((0,0),(1,1),(1,1),(0,0)), mode='CONSTANT')

        #since we have padded we need to add plus 1 for our transformed coordinates
        x = x + 1
        y = y + 1

        x = tf.clip_by_value(x, 0.0, tf.cast(width,tf.float32)+1.0)
        
        #since the values are in fraction we need to take floor value which selects left pixels
        x_float   = tf.floor(x)
        y_float   = tf.floor(y)
        x_1_float = x_float + 1
        
        #x_1_float = tf.minimum(x_1_float, tf.cast(width,tf.float32)+1.0 )
        
        #Since,the index are in integer we convert float into integer
        x_int = tf.cast(x_float, tf.int32)
        y_int = tf.cast(y_float, tf.int32)
        x_1_int = tf.cast(x_1_float, tf.int32)
        
        #we required total dimention for reshaping 
        dim_y  = width + 2 * 1 #padding
        dim_xy = (width + 2) * (height + 2)

        #There are number of images with there individual coordinate space now we need to convert
        #individual coordinate space into a single coordinate space
        #eg: x = [ [0,1,2,3], [0,1,2,3], [0,1,2,3]] 
        # y= [[0,0,0,0],[1,1,1,1],[2,2,2,2]]into [0 ,1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        #Converting 2d spatial dimention into 1d vector
        
        base = tf.tile(tf.expand_dims(tf.range(num_batches),1) * dim_xy, [1, width * height])
        base = tf.reshape(base, [-1])
    
        x_l = x_int   + (base + y_int * dim_y)
        x_r = x_1_int + (base + y_int * dim_y)
        
        #Flattering input image
        img_flat = tf.reshape(img, [-1, num_channels]) 
        
        #tf.gather selects pixels of img from coordinate x_l and x_r
        #Therefore pixel_l and pixel_r contains only selected coordinates pixels from img
        pixel_l = tf.gather(img_flat, x_l)
        pixel_r = tf.gather(img_flat, x_r)
        
        #Now for bilinear interpolation each left and right pixel must be associated with its respective weights.
        weights_l = tf.expand_dims(x_1_float - x, 1)#Since nearer pixel has greater weight
        weights_r = tf.expand_dims(x - x_float, 1)
        

        
        out = (weights_l * pixel_l) + (weights_r * pixel_r)

        return tf.reshape(out, [num_batches, height, width, num_channels])
    def bilinear_sampling(img, disparity):
       

        width_f =  tf.cast(width , tf.float32)
        height_f = tf.cast(height, tf.float32)

        #Creating meshgrid which contains represents coordinates(x, y) of images
        x_grid, y_grid = tf.cast(tf.meshgrid(tf.range(width), 
                                     tf.range(height)), tf.float32)
        
        #Flatterning grids
        x_flat = tf.reshape(x_grid, [-1])
        y_flat = tf.reshape(y_grid, [-1])

        #Since, there are num_batches so we need to add up grids for all batches
        x_flat = tf.tile(x_flat, [num_batches])
        y_flat = tf.tile(y_flat, [num_batches])

        #Flatterning disparity size = (num_batches * weight * height)
        disparity_flat = tf.reshape(disparity, [-1])
    
        #Adding disparity to find / applying transformation
        x_transf = x_flat + (disparity_flat * width_f) #Since the out of sigmoid funtion in 0 -1 so we muliply with width

        #Transformed coordinates are in fraction since, there is no fraction pixel so we interplate
        out = interpolate(img, x_transf, y_flat)



        return out
    
    with tf1.variable_scope("name"):
        width       = tf.shape(img)[2]
        height      = tf.shape(img)[1]
        num_batches = tf.shape(img)[0]
        num_channels= tf.shape(img)[3]
        out = bilinear_sampling(img, disparity)

    return out

In [None]:
def bilinear_sampler_1d_h(input_images, x_offset, wrap_mode='border', name='bilinear_sampler', **kwargs):
    def _repeat(x, n_repeats):
        rep = tf.tile(tf.expand_dims(x, 1), [1, n_repeats])
        return tf.reshape(rep, [-1])

    def _interpolate(im, x, y):
        

        # handle both texture border types
        _edge_size = 0
        if _wrap_mode == 'border':
            _edge_size = 1
            im = tf.pad(im, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT')
            x = x + _edge_size
            y = y + _edge_size
        elif _wrap_mode == 'edge':
            _edge_size = 0
        else:
            return None

        x = tf.clip_by_value(x, 0.0,  _width_f - 1 + 2 * _edge_size)

        x0_f = tf.floor(x)
        y0_f = tf.floor(y)
        x1_f = x0_f + 1

        x0 = tf.cast(x0_f, tf.int32)
        y0 = tf.cast(y0_f, tf.int32)
        x1 = tf.cast(tf.minimum(x1_f,  _width_f - 1 + 2 * _edge_size), tf.int32)

        dim2 = (_width + 2 * _edge_size)
        dim1 = (_width + 2 * _edge_size) * (_height + 2 * _edge_size)
        base = _repeat(tf.range(_num_batch) * dim1, _height * _width)
        base_y0 = base + y0 * dim2
        idx_l = base_y0 + x0
        idx_r = base_y0 + x1

        im_flat = tf.reshape(im, tf.stack([-1, _num_channels]))

        pix_l = tf.gather(im_flat, idx_l)
        pix_r = tf.gather(im_flat, idx_r)

        weight_l = tf.expand_dims(x1_f - x, 1)
        weight_r = tf.expand_dims(x - x0_f, 1)

        return weight_l * pix_l + weight_r * pix_r

    def _transform(input_images, x_offset):
      
        # grid of (x_t, y_t, 1), eq (1) in ref [1]
        x_t, y_t = tf.meshgrid(tf.linspace(0.0,   _width_f - 1.0,  _width),
                               tf.linspace(0.0 , _height_f - 1.0 , _height))

        x_t_flat = tf.reshape(x_t, (1, -1))
        y_t_flat = tf.reshape(y_t, (1, -1))

        x_t_flat = tf.tile(x_t_flat, tf.stack([_num_batch, 1]))
        y_t_flat = tf.tile(y_t_flat, tf.stack([_num_batch, 1]))

        x_t_flat = tf.reshape(x_t_flat, [-1])
        y_t_flat = tf.reshape(y_t_flat, [-1])

        x_t_flat = x_t_flat + tf.reshape(x_offset, [-1]) * _width_f

        input_transformed = _interpolate(input_images, x_t_flat, y_t_flat)

        output = tf.reshape(
            input_transformed, tf.stack([_num_batch, _height, _width, _num_channels]))
        return output

    with tf1.variable_scope(name):
        _num_batch    = tf.shape(input_images)[0]
        _height       = tf.shape(input_images)[1]
        _width        = tf.shape(input_images)[2]
        _num_channels = tf.shape(input_images)[3]

        _height_f = tf.cast(_height, tf.float32)
        _width_f  = tf.cast(_width,  tf.float32)

        _wrap_mode = wrap_mode

        output = _transform(input_images, x_offset)
        return output
def generate_image_left(img, disp):
    out = bilinear_sampler_1d_h(img, -disp)
    return tf.reduce_mean(tf.abs(out - disp))
    
def generate_image_right(img, disp):
    out = bilinear_sampler_1d_h(img, disp)
    return tf.reduce_mean(tf.abs(out - disp))

In [6]:
a = np.random.randint(0, 10 , size =(50,300,300,3))
b = np.random.randn(50,300,300,1)

d_left = np.random.randint(0, 20 , size =(1,256,256,1))
d_right = np.random.randint(0,20 , size = (1,256,256,1))

disp_left = tf.constant(d_left,dtype =tf.float32)
disp_right = tf.constant(d_right, dtype = tf.float32)

disp_left = make_pyramide_image(disp_left)
disp_right = make_pyramide_image(disp_right)

left = generate_image_left(disp_right, disp_left)
right = generate_image_right(disp_left, disp_right)

left_ = lr_consistency_loss_left(disp_left, disp_right)
right_  = lr_consistency_loss_right(disp_left, disp_right)

print(left.numpy(), left_.numpy())
print(right.numpy(),right_.numpy())


NameError: name 'generate_image_left' is not defined

# Training _LOSSES

In [None]:
def SSIM(image, pred_image, block_size=3):
    
    C1 = 0.01 ** 2
    C2 = 0.03 ** 2
    
    u_x = tf.nn.avg_pool2d(image      ,block_size, strides=1, padding='VALID')
    u_y = tf.nn.avg_pool2d(pred_image ,block_size, strides=1, padding='VALID')

    sigma_x  = tf.nn.avg_pool2d(image**2           ,block_size, strides=1, padding='VALID') - u_x**2
    sigma_y  = tf.nn.avg_pool2d(pred_image**2      ,block_size, strides=1, padding='VALID') - u_y**2
    sigma_xy = tf.nn.avg_pool2d(image * pred_image ,block_size, strides=1, padding='VALID') - u_x * u_y
    
    SSIM_num = ((2 * u_x * u_y + C1)   * (2 * sigma_x * sigma_y   + C2)) 
    SSIM_den = ((u_x**2 + u_y**2 + C1) * (sigma_x**2 + sigma_y**2 + C2))
    
    SSIM = SSIM_num / SSIM_den
    
    return tf.clip_by_value((1 - SSIM) / 2, 0, 1)



def apperance_matching_loss(image, pred_image):
    alpha = 0.85
    L1_error = tf.abs(image - pred_image)
    ssim_error = SSIM(image, pred_image, 3)
    
    C_ap = tf.reduce_mean((alpha * ssim_error)) + tf.reduce_mean((1 - alpha) * L1_error)
    
    return C_ap


def disparity_smoothness_loss(disp, image):
    
    disp_gradient_y , disp_gradient_x  = tf.image.image_gradients(disp)
    image_gradient_y, image_gradient_x = tf.image.image_gradients(image)

    im_dx = -tf.reduce_mean(tf.abs(image_gradient_x),axis=-1, keepdims=True)
    im_dy = -tf.reduce_mean(tf.abs(image_gradient_y),axis=-1, keepdims=True)
    
    loss_dx = tf.multiply(tf.abs(disp_gradient_x), tf.math.exp(im_dx))
    loss_dy = tf.multiply(tf.abs(disp_gradient_y), tf.math.exp(im_dy))
    
    disp_smoothness_loss = tf.reduce_mean((loss_dx + loss_dy))
    
    
    return disp_smoothness_loss


def lr_consistency_loss_left(disp_left, disp_right):
    disp_left_from_right = STN(disp_right,  -disp_left)
    return tf.reduce_mean(tf.abs(disp_left - disp_left_from_right))
    

def lr_consistency_loss_right(disp_left, disp_right):
    disp_right_from_left = STN(disp_left,  disp_right)
    return tf.reduce_mean(tf.abs(disp_right- disp_right_from_left)) 
        


def training_loss_each_scale(image_left,image_right, disp, r):

    alpha_ap = 1
    alpha_ds = (0.1 / 2 ** r)
    alpha_lr = 1
    
    #predicted disparity
    disp_left   = tf.expand_dims(disp[:,:,:,0], 3)
    disp_right  = tf.expand_dims(disp[:,:,:,1], 3)

    
    #Sampling left images form right using left_disparity and vice-versa
    pred_left   = STN(image_right, -disp_left)
    pred_right  = STN(image_left , disp_right)
    
    #Apperance_matching_loss
    C_ap_l = apperance_matching_loss(image_left,  pred_left)
    C_ap_r = apperance_matching_loss(image_right, pred_right)
    
    #Disparity Smoothness Loss
    C_ds_l = disparity_smoothness_loss(disp_left,  image_left)
    C_ds_r = disparity_smoothness_loss(disp_right, image_right)
    
    #Left-Right Disparity Cosnsistency Loss
    C_lr_l = lr_consistency_loss_left(disp_left, disp_right)
    C_lr_r = lr_consistency_loss_right(disp_left, disp_right)
    
    loss = alpha_ap * (C_ap_l + C_ap_r) + alpha_ds * (C_ds_l + C_ds_r) + alpha_lr * (C_lr_l + C_lr_r)
    
    
    return loss


def total_loss(imgL, imgR, disp):
    left_pyramide  = make_pyramide_image(imgL)
    right_pyramide = make_pyramide_image(imgR)
    disp           = make_pyramide_image(disp)
    tot_loss = tf.constant([0.0])
    
    for i in range(4):
        loss = training_loss_each_scale(left_pyramide[i], right_pyramide[i], disp[i], i + 1)
        tot_loss = tf.add(tot_loss, loss)
    
    return tot_loss

    
def make_pyramide_image(img, num_scales =4):
    scaled_imgs = [img]
    s = tf.shape(img)
    h = s[1]
    w = s[2]
    for i in range(num_scales - 1):
        ratio = 2 ** (i + 1)
        nh = h // ratio
        nw = w // ratio
        scaled_imgs.append(tf.image.resize(img, [nh, nw]))
    return scaled_imgs
        
        
    
    

In [None]:
t = tf.constant([[1., 2,  3], [4,  5,  6]])
t2 = tf.clip_by_value(t, clip_value_min=-1, clip_value_max=1)

exp = tf.math.exp(t)
with tf1.Session() as sess:
    e = sess.run(exp)
    print(e)
    
t2.numpy()

In [None]:
BATCH_SIZE = 1
IMAGE_HEIGHT = 5
IMAGE_WIDTH = 5
CHANNELS = 1
image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS,
  delta=1, dtype=tf.float32),
  shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
dx, dy = tf.image.image_gradients(image)
exp = 
with tf1.Session() as sess:
    d_y, d_x = sess.run([dx, dy])
    print(d_y)
   

In [None]:
dCost_dW.numpy()

# Comparision_Section

In [116]:
#ours
def STN(img, disparity):
    
    def interpolate(img, x, y):
        #bilinear_interpolation

        #For corner pixel there is no either left or right / top or down pixels so padding is necessary
        img = tf.pad(img, paddings= ((0,0),(1,1),(1,1),(0,0)), mode='CONSTANT')

        #since we have padded we need to add plus 1 for our transformed coordinates
        x = x + 1
        y = y + 1

        x = tf.clip_by_value(x, 0.0, tf.cast(width,tf.float32)+1.0)
        
        #since the values are in fraction we need to take floor value which selects left pixels
        x_float   = tf.floor(x)
        y_float   = tf.floor(y)
        x_1_float = x_float + 1
        
        #x_1_float = tf.minimum(x_1_float, tf.cast(width,tf.float32)+1.0 )
        
        #Since,the index are in integer we convert float into integer
        x_int = tf.cast(x_float, tf.int32)
        y_int = tf.cast(y_float, tf.int32)
        x_1_int = tf.cast(x_1_float, tf.int32)
        
        #we required total dimention for reshaping 
        dim_y  = width + 2 * 1 #padding
        dim_xy = (width + 2) * (height + 2)

        #There are number of images with there individual coordinate space now we need to convert
        #individual coordinate space into a single coordinate space
        #eg: x = [ [0,1,2,3], [0,1,2,3], [0,1,2,3]] 
        # y= [[0,0,0,0],[1,1,1,1],[2,2,2,2]]into [0 ,1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        #Converting 2d spatial dimention into 1d vector
        
        base = tf.tile(tf.expand_dims(tf.range(num_batches),1) * dim_xy, [1, width * height])
        base = tf.reshape(base, [-1])
    
        x_l = x_int   + (base + y_int * dim_y)
        x_r = x_1_int + (base + y_int * dim_y)
        
        #Flattering input image
        img_flat = tf.reshape(img, [-1, num_channels]) 
        
        #tf.gather selects pixels of img from coordinate x_l and x_r
        #Therefore pixel_l and pixel_r contains only selected coordinates pixels from img
        pixel_l = tf.gather(img_flat, x_l)
        pixel_r = tf.gather(img_flat, x_r)
        
        #Now for bilinear interpolation each left and right pixel must be associated with its respective weights.
        weights_l = tf.expand_dims(x_1_float - x, 1)#Since nearer pixel has greater weight
        weights_r = tf.expand_dims(x - x_float, 1)
        

        
        out = (weights_l * pixel_l) + (weights_r * pixel_r)

        return tf.reshape(out, [num_batches, height, width, num_channels])
    def bilinear_sampling(img, disparity):
       

        width_f =  tf.cast(width , tf.float32)
        height_f = tf.cast(height, tf.float32)

        #Creating meshgrid which contains represents coordinates(x, y) of images
        x_grid, y_grid = tf.cast(tf.meshgrid(tf.range(width), 
                                     tf.range(height)), tf.float32)
        
        #Flatterning grids
        x_flat = tf.reshape(x_grid, [-1])
        y_flat = tf.reshape(y_grid, [-1])

        #Since, there are num_batches so we need to add up grids for all batches
        x_flat = tf.tile(x_flat, [num_batches])
        y_flat = tf.tile(y_flat, [num_batches])

        #Flatterning disparity size = (num_batches * weight * height)
        disparity_flat = tf.reshape(disparity, [-1])
    
        #Adding disparity to find / applying transformation
        x_transf = x_flat + (disparity_flat * width_f) #Since the out of sigmoid funtion in 0 -1 so we muliply with width

        #Transformed coordinates are in fraction since, there is no fraction pixel so we interplate
        out = interpolate(img, x_transf, y_flat)



        return out
    
    with tf1.variable_scope("name"):
        width       = tf.shape(img)[2]
        height      = tf.shape(img)[1]
        num_batches = tf.shape(img)[0]
        num_channels= tf.shape(img)[3]
        out = bilinear_sampling(img, disparity)

    return out



def SSIM(image, pred_image, block_size=3):
    
    C1 = 0.01 ** 2
    C2 = 0.03 ** 2
    
    u_x = tf.nn.avg_pool2d(image      ,block_size, strides=1, padding='VALID')
    u_y = tf.nn.avg_pool2d(pred_image ,block_size, strides=1, padding='VALID')

    sigma_x  = tf.nn.avg_pool2d(image**2           ,block_size, strides=1, padding='VALID') - u_x**2
    sigma_y  = tf.nn.avg_pool2d(pred_image**2      ,block_size, strides=1, padding='VALID') - u_y**2
    sigma_xy = tf.nn.avg_pool2d(image * pred_image ,block_size, strides=1, padding='VALID') - u_x * u_y
    
    SSIM_num = ((2 * u_x * u_y + C1)   * (2 * sigma_x * sigma_y   + C2)) 
    SSIM_den = ((u_x**2 + u_y**2 + C1) * (sigma_x**2 + sigma_y**2 + C2))
    
    SSIM = SSIM_num / SSIM_den
    
    return tf.clip_by_value((1 - SSIM) / 2, 0, 1)



def apperance_matching_loss(image, pred_image):
    alpha = 0.85
    L1_error = tf.abs(image - pred_image)
    ssim_error = SSIM(image, pred_image, 3)
    
    C_ap = tf.reduce_mean((alpha * ssim_error)) + tf.reduce_mean((1 - alpha) * L1_error)
    
    return C_ap


def disparity_smoothness_loss(disp, image):
    
    disp_gradient_y , disp_gradient_x  = tf.image.image_gradients(disp)
    image_gradient_y, image_gradient_x = tf.image.image_gradients(image)

    im_dx = -tf.reduce_mean(tf.abs(image_gradient_x),axis=-1, keepdims=True)
    im_dy = -tf.reduce_mean(tf.abs(image_gradient_y),axis=-1, keepdims=True)
    
    loss_dx = tf.multiply(tf.abs(disp_gradient_x), tf.math.exp(im_dx))
    loss_dy = tf.multiply(tf.abs(disp_gradient_y), tf.math.exp(im_dy))
    
    disp_smoothness_loss = tf.reduce_mean((loss_dx + loss_dy))
    
    
    return disp_smoothness_loss


def lr_consistency_loss_left(disp_left, disp_right):
    disp_left_from_right = STN(disp_right,  -disp_left)
    return tf.reduce_mean(tf.abs(disp_left - disp_left_from_right))
    

def lr_consistency_loss_right(disp_left, disp_right):
    disp_right_from_left = STN(disp_left,  disp_right)
    return tf.reduce_mean(tf.abs(disp_right- disp_right_from_left)) 
        


def training_loss_each_scale(image_left,image_right, disp, r):

    alpha_ap = 1
    alpha_ds = (0.1 / 2 ** r)
    alpha_lr = 1
    
    #predicted disparity
    disp_left   = tf.expand_dims(disp[:,:,:,0], 3)
    disp_right  = tf.expand_dims(disp[:,:,:,1], 3)

    
    #Sampling left images form right using left_disparity and vice-versa
    pred_left   = STN(image_right, -disp_left)
    pred_right  = STN(image_left , disp_right)
    
    #Apperance_matching_loss
    C_ap_l = apperance_matching_loss(image_left,  pred_left)
    C_ap_r = apperance_matching_loss(image_right, pred_right)
    
    #Disparity Smoothness Loss
    C_ds_l = disparity_smoothness_loss(disp_left,  image_left)
    C_ds_r = disparity_smoothness_loss(disp_right, image_right)
    
    #Left-Right Disparity Cosnsistency Loss
    C_lr_l = lr_consistency_loss_left(disp_left, disp_right)
    C_lr_r = lr_consistency_loss_right(disp_left, disp_right)
    
    loss = alpha_ap * (C_ap_l + C_ap_r) + alpha_ds * (C_ds_l + C_ds_r) + alpha_lr * (C_lr_l + C_lr_r)
    
    
    return loss


def total_loss(imgL, imgR, disp):
    left_pyramide  = make_pyramide_image(imgL)
    right_pyramide = make_pyramide_image(imgR)
    disp           = make_pyramide_image(disp)
    tot_loss = tf.constant([0.0])
    
    for i in range(4):
        loss = training_loss_each_scale(left_pyramide[i], right_pyramide[i], disp[i], i + 1)
        tot_loss = tf.add(tot_loss, loss)
    
    return tot_loss

    
def make_pyramide_image(img, num_scales =4):
    scaled_imgs = [img]
    s = tf.shape(img)
    h = s[1]
    w = s[2]
    for i in range(num_scales - 1):
        ratio = 2 ** (i + 1)
        nh = h // ratio
        nw = w // ratio
        scaled_imgs.append(tf.image.resize(img, [nh, nw]))
    return scaled_imgs
        
        
    
    

In [117]:
#Papers
from bilinear_sampler import *
class smoothness_losses:
    
    def scale_pyramid(self, img, num_scales):
        scaled_imgs = [img]
        s = tf.shape(img)
        h = s[1]
        w = s[2]
        for i in range(num_scales - 1):
            ratio = 2 ** (i + 1)
            nh = h // ratio
            nw = w // ratio
            scaled_imgs.append(tf.image.resize(img, [nh, nw]))
        return scaled_imgs

    
    def gradient_x(self, img):
        gx = img[:,:,:-1,:] - img[:,:,1:,:]
        return tf.pad(gx,paddings = ((0,0),(0,0),(0,1),(0,0)))

    def gradient_y(self, img):
        gy = img[:,:-1,:,:] - img[:,1:,:,:]
        return tf.pad(gy,paddings=((0,0),(0,1),(0,0),(0,0)))
    
    def get_disparity_smoothness(self, disp, pyramid):
        disp_gradients_x = [self.gradient_x(d) for d in disp]
        disp_gradients_y = [self.gradient_y(d) for d in disp]

        image_gradients_x = [self.gradient_x(img) for img in pyramid]
        image_gradients_y = [self.gradient_y(img) for img in pyramid]

        weights_x = [tf.exp(-tf.reduce_mean(tf.abs(g), 3, keep_dims=True)) for g in image_gradients_x]
        weights_y = [tf.exp(-tf.reduce_mean(tf.abs(g), 3, keep_dims=True)) for g in image_gradients_y]

        smoothness_x = [disp_gradients_x[i] * weights_x[i] for i in range(4)]
        smoothness_y = [disp_gradients_y[i] * weights_y[i] for i in range(4)]
        return smoothness_x + smoothness_y
    
    def get_apperance_loss(self, left_est, left_pyramid):
        
        # L1
        self.l1_left = [tf.abs( left_est[i] - left_pyramid[i]) for i in range(4)]
        self.l1_reconstruction_loss_left  = [tf.reduce_mean(l) for l in self.l1_left]

        # SSIM
        self.ssim_left = [self.SSIM_paper( left_est[i], left_pyramid[i]) for i in range(1)]
        self.ssim_loss_left  = [tf.reduce_mean(s) for s in self.ssim_left]

        # WEIGTHED SUM
        self.image_loss_left  = [0.85 * self.ssim_loss_left[i]  + (1 - 0.85) * self.l1_reconstruction_loss_left[i]  for i in range(4)]

        return self.image_loss_left
    
    def generate_image_left(self, img, disp):
        return bilinear_sampler_1d_h(img, -disp)
    
    def generate_image_right(self, img, disp):
        return bilinear_sampler_1d_h(img, disp)
    
    def SSIM_paper(self, x, y):
        C1 = 0.01 ** 2
        C2 = 0.03 ** 2

        mu_x = tf.nn.avg_pool2d(x, 3, 1, 'VALID')
        mu_y = tf.nn.avg_pool2d(y, 3, 1, 'VALID')

        sigma_x  = tf.nn.avg_pool2d(x ** 2, 3, 1, 'VALID') - mu_x ** 2
        sigma_y  = tf.nn.avg_pool2d(y ** 2, 3, 1, 'VALID') - mu_y ** 2
        sigma_xy = tf.nn.avg_pool2d(x * y , 3, 1, 'VALID') - mu_x * mu_y

        SSIM_n = (2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2)
        SSIM_d = (mu_x ** 2 + mu_y ** 2 + C1) * (sigma_x + sigma_y + C2)

        SSIM = SSIM_n / SSIM_d

        return tf.clip_by_value((1 - SSIM) / 2, 0, 1)
    
    
    def build_outputs(self):
        
        # STORE DISPARITIES
        with tf1.variable_scope('disparities'):
            
            self.disp_left_est  = [tf.expand_dims(d[:,:,:,0], 3) for d in self.disp_est]
            self.disp_right_est = [tf.expand_dims(d[:,:,:,1], 3) for d in self.disp_est]


        # GENERATE IMAGES
        with tf1.variable_scope('images'):
            self.left_est  = [self.generate_image_left(self.right_pyramid[i], self.disp_left_est[i])  for i in range(4)]
            self.right_est = [self.generate_image_right(self.left_pyramid[i], self.disp_right_est[i]) for i in range(4)]

        # LR CONSISTENCY
        with tf1.variable_scope('left-right'):
            self.right_to_left_disp = [self.generate_image_left(self.disp_right_est[i], self.disp_left_est[i])  for i in range(4)]
            self.left_to_right_disp = [self.generate_image_right(self.disp_left_est[i], self.disp_right_est[i]) for i in range(4)]

    
        # DISPARITY SMOOTHNESS
        with tf1.variable_scope('smoothness'):
            self.disp_left_smoothness  = self.get_disparity_smoothness(self.disp_left_est,  self.left_pyramid)
            self.disp_right_smoothness = self.get_disparity_smoothness(self.disp_right_est, self.right_pyramid)

        
    def build_losses(self, disp_est, left_pyramid, right_pyramid):
        self.disp_est = disp_est
        self.left_pyramid   = left_pyramid
        self.right_pyramid  = right_pyramid
        
        self.build_outputs()
        with tf1.variable_scope('losses', reuse=True):
            # IMAGE RECONSTRUCTION
            # L1
            self.l1_left = [tf.abs( self.left_est[i] - self.left_pyramid[i]) for i in range(4)]
            self.l1_reconstruction_loss_left  = [tf.reduce_mean(l) for l in self.l1_left]
            self.l1_right = [tf.abs(self.right_est[i] - self.right_pyramid[i]) for i in range(4)]
            self.l1_reconstruction_loss_right = [tf.reduce_mean(l) for l in self.l1_right]

            # SSIM
            self.ssim_left = [SSIM( self.left_est[i],  self.left_pyramid[i]) for i in range(4)]
            self.ssim_loss_left  = [tf.reduce_mean(s) for s in self.ssim_left]
            self.ssim_right = [SSIM(self.right_est[i], self.right_pyramid[i]) for i in range(4)]
            self.ssim_loss_right = [tf.reduce_mean(s) for s in self.ssim_right]

            # WEIGTHED SUM
            self.image_loss_right = [0.85 * self.ssim_loss_right[i] + (1 - 0.85) * self.l1_reconstruction_loss_right[i] for i in range(4)]
            self.image_loss_left  = [0.85 * self.ssim_loss_left[i]  + (1 - 0.85) * self.l1_reconstruction_loss_left[i]  for i in range(4)]
            self.image_loss = tf.add_n(self.image_loss_left + self.image_loss_right)

            # DISPARITY SMOOTHNESS
            self.disp_left_loss  = [tf.reduce_mean(tf.abs(self.disp_left_smoothness[i]))  / 2 ** i for i in range(4)]
            self.disp_right_loss = [tf.reduce_mean(tf.abs(self.disp_right_smoothness[i])) / 2 ** i for i in range(4)]
            self.disp_gradient_loss = tf.add_n(self.disp_left_loss + self.disp_right_loss)

            # LR CONSISTENCY
            self.lr_left_loss  = [tf.reduce_mean(tf.abs(self.right_to_left_disp[i] - self.disp_left_est[i]))  for i in range(4)]
            self.lr_right_loss = [tf.reduce_mean(tf.abs(self.left_to_right_disp[i] - self.disp_right_est[i])) for i in range(4)]
            self.lr_loss = tf.add_n(self.lr_left_loss + self.lr_right_loss)

            # TOTAL LOSS
            self.total_loss = self.image_loss + 0.1 * self.disp_gradient_loss + 1.0 * self.lr_loss
        
        return self.total_loss

In [118]:
paper = smoothness_losses()


In [119]:
np.random.seed(21)
num_disp    = np.random.randint(0, 40, size=(1, 512, 512, 2))
image_left  = np.random.randint(0, 40, size=(1, 512, 512, 3))
image_right = np.random.randint(0, 40, size=(1, 512, 512, 3))


disp  = tf.constant(num_disp,dtype=tf.float32)
img_L = tf.constant(image_left,  dtype=tf.float32)
img_R = tf.constant(image_right, dtype=tf.float32)

disp_pyramid  = paper.scale_pyramid(disp, 4)
img_L_pyramid = paper.scale_pyramid(img_L, 4)
img_R_pyramid = paper.scale_pyramid(img_R, 4)

loss = paper.build_losses(disp_pyramid, img_L_pyramid, img_R_pyramid)
print(loss.numpy())

loss_ = total_loss(img_L, img_R, disp)
print(loss_.numpy())

# print(len(img_L_pyramid), len(img_R_pyramid))
# print(img_L_pyramid[3].numpy().shape)
# print(disp[1].numpy().shape)
# loss_smooth_ours, dx_our, dd_our = disparity_smoothness_loss(disp[0], image[0])
# loss_smooth,dx, dd                 = paper.get_disparity_smoothness(disp, image)

# C_ap_l = paper.get_apperance_loss(pre_image, image)
# C_ap_l_ours = apperance_matching_loss(image[0], pre_image[0])
# print(C_ap_l[0].numpy(), C_ap_l[0].numpy())

# print((dx_our.numpy()==
#        dx.numpy()))
# print(loss_smooth.numpy(), loss_smooth_ours.numpy())
# with tf1.Session() as sess:
    
#     sess.run([loss_smooth_ours])
    

186.59431
[186.59433]


In [71]:
#testing apperance matching loss only
out = paper.SSIM_paper(img_L,img_R)
out_ = SSIM(img_L, img_R)
print((out.numpy()==out_.numpy()).all())

False


In [None]:
%reset_selective total_loss

In [None]:
out = np.zeros_like(num_disp)
out[:,:,:-1,:] = num_disp[:,:,:-1,:] - num_disp[:,:,1:,:]
print(out[:,:,-1:,0])

In [None]:
x_t, y_t = np.meshgrid(np.linspace(-1, 1, 5), np.linspace(-1, 1, 5))
theta = np.array([[0.7, -0.7, 0], [0.7, 0.7, 0]])
theta = np.array([[2, 0, 1], [0, 1, 0]])
grid = np.array([x_t, y_t, 1])

In [None]:
out = np.dot(theta, grid)

In [None]:
print(grid)
print(out)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(x_t, y_t)
plt.axis([-3,3,-3,3])
plt.grid()

In [None]:
plt.scatter(out[0], out[1])
plt.axis([-3,3,-3,3])
plt.grid()

In [None]:
a = np.random.randint(0, 10, size =(3, 5))
i = tf.constant(a)
    index = [0,1]
    out = tf.gather(i, index,axis=-1)
    out = tf.gather(out, index, axis=0)

In [None]:
print(i)
print(out)

In [None]:
a = np.random.randint(0, 10, size =(2, 5, 10))
b = np.random.randn(2, 5, 10)
print(b.shape)
img = tf.constant(a)
img = tf.cast(img, tf.float32)
x_offset = tf.constant(b)
x_offset = tf.cast(x_offset, tf.float32)
_height = tf.shape(img)[1]
_width = tf.shape(img)[2]
_height_f  = tf.cast(_height, tf.float32)
_width_f  = tf.cast(_width,  tf.float32)
_num_batch = tf.shape(img)[0]

def _transform(input_images, x_offset):
    with tf1.variable_scope('transform'):
        # grid of (x_t, y_t, 1), eq (1) in ref [1]
        x_t, y_t = tf.meshgrid(tf.linspace(0.0,   _width_f - 1.0,  _width),
                               tf.linspace(0.0 , _height_f - 1.0 , _height))

        x_t_flat = tf.reshape(x_t, (1, -1))
        y_t_flat = tf.reshape(y_t, (1, -1))
        
        stack = tf.stack([_num_batch,1])

        x_t_flat = tf.tile(x_t_flat, tf.stack([_num_batch, 1]))
        y_t_flat = tf.tile(y_t_flat, tf.stack([_num_batch, 1]))

        x_t_flat = tf.reshape(x_t_flat, [-1])
        y_t_flat = tf.reshape(y_t_flat, [-1])
        
        print(x_t_flat.dtype)
        print(x_offset.dtype)

        #x_t_flat = x_t_flat + tf.reshape(x_offset, [-1]) * _width_f

#         input_transformed = _interpolate(input_images, x_t_flat, y_t_flat)

#         output = tf.reshape(
#             input_transformed, tf.stack([_num_batch, _height, _width, _num_channels]))
        return x_t_flat, y_t_flat, stack

In [None]:
x_t, y_t, stack = _transform(img, x_offset)

In [None]:
base = tf.tile(tf.expand_dims(tf.range(_num_batch)* (_height * _width), 1 ),[1,( _height * _width)])
base = tf.reshape(base, [-1])
base = tf.cast(base, tf.int32)
y_t  = tf.cast(y_t, tf.int32)
x_t  = tf.cast(x_t, tf.int32)
print(base)
print(x_t.numpy() + (base + y_t * 5))

In [None]:
a = np.random.randint(0, 5,  size =(3, 3))
a = [[0],[50]]
img = tf.constant(a)
#img = tf.reshape(img, [1,-1])
out = tf.tile(img,[2,1])
print(img)
print(out)

In [None]:
stack = tf.stack([_num_batch])
print(stack.numpy())

In [None]:
a = np.random.randint(0, 5,  size =(3, 3))
img = tf.constant(a)
img = tf.reshape(img,[-1])
print(tf.add_n(img))
print(img)

In [None]:
tf1.s