In [1]:
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from scipy import ndimage
import numpy as np


In [None]:
def conv2d(input_images, weight, stride = 1):
    return tf.nn.conv2d(input_images, weight ,strides=[1,stride, stride,1], padding='VALID',name="conv")
        

def conv_elu(input_layer, k, in_filter, ou_filter, stride, scope, activation = tf.nn.elu, reuse=False):
    
    with tf.compat.v1.variable_scope(scope, reuse = reuse):
        W = tf.compat.v1.get_variable("weight", [k, k, in_filter, ou_filter], initializer=tf1.glorot_uniform_initializer())
        b = tf.compat.v1.get_variable("biases", [ou_filter],initializer=tf1.glorot_uniform_initializer())
        
        #we need pyrimad which output is half of its input so, need to pad input.
        p = np.floor((k - 1) / 2).astype('int32')
        padding = tf.constant([[0,0],[p, p],[p, p],[0,0]])
        p_x = tf.pad(input_layer, padding)
        
        #padded input
        conv = conv2d(p_x, W, stride = stride)
        output = tf.nn.bias_add(conv, b)
        out = activation(output)
        
    return out


def upsampling(input_layer, factor):    
    return tf.keras.layers.UpSampling2D(size=(factor, factor))(input_layer)
    
             

def upconv(input_layer, k, in_filter, ou_filter, scope, reuse=False):
    
    with tf.compat.v1.variable_scope(scope, reuse = reuse):    
        #Upsampling
        upsample = upsampling(input_layer, 2)
        out = conv_elu(upsample, k, in_filter, ou_filter, 1, scope='conv_elu')
        return out


def conv_block(input_layer, k, in_filter, ou_filter, scope):
    
    c1 = conv_elu(input_layer, k,  in_filter, ou_filter,  1, scope=scope )
    c2 = conv_elu(c1,          k,  ou_filter, ou_filter,  2, scope=scope+'b')
    
    return c2
    
def get_disp(x, in_filter,scope):
    disp = 0.3 * conv_elu(x, 3, in_filter, 2, 1, scope = scope, activation = tf.nn.sigmoid)
    return disp


In [None]:
def init_placeholder():
    
    tf1.disable_eager_execution()
    tf1.reset_default_graph()
    
    with tf1.name_scope("Input_image"):
        input_layer = tf1.placeholder('float', shape = input_shape)
        
    return input_layer


def make_architecture(input_layers):
    
    with tf1.name_scope("ENCNN"):
    
        with tf1.name_scope("encoder"):
            conv1 = conv_block(input_layers, 7,  3,  32, 'conv1')#2
            conv2 = conv_block(conv1,        5, 32,  64, 'conv2')#4
            conv3 = conv_block(conv2,        3, 64, 128, 'conv3')#8
            conv4 = conv_block(conv3,        3, 128,256, 'conv4')#16
            conv5 = conv_block(conv4,        3, 256,512, 'conv5')#32
                conv6 = conv_block(conv5,        3, 512,512, 'conv6')#64
            conv7 = conv_block(conv6,        3, 512,512, 'conv7')#128
            
            
            


        with tf1.name_scope("decoder"):
            #upsampling 7
            upconv7 = upconv(conv7,     3,  512,  512, scope =  'upconv7')
            concat7 = tf.concat([upconv7, conv6], axis=-1, name = 'concat7')
            iconv7  = conv_elu(concat7, 3, 1024,  512, 1, scope= 'iconv7')

            #upsampling 6
            upconv6 = upconv(iconv7,    3,  512,  512, scope =  'upconv6')
            concat6 = tf.concat([upconv6, conv5], axis=-1, name = 'concat6')
            iconv6  = conv_elu(concat6, 3, 1024,  512, 1, scope= 'iconv6')

            #upsampling 5
            upconv5 = upconv(iconv6,    3,  512,  256, scope =  'upconv5')
            concat5 = tf.concat([upconv5, conv4], axis=-1, name= 'concat5')
            iconv5  = conv_elu(concat5, 3,  512,  256, 1, scope= 'iconv5')

            #upsampling 4
            upconv4 = upconv(iconv5,    3,   256,  128, scope = 'upconv4')
            concat4 = tf.concat([upconv4, conv3], axis=-1, name ='concat4')
            iconv4  = conv_elu(concat4, 3, 256,  128, 1, scope= 'iconv4')
            disp4   = get_disp(iconv4, 128, scope= 'disp4')
            updisp4 = upsampling(disp4, 2)

            #upsampling 3
            upconv3 = upconv(iconv4,    3,  128,  64, scope = 'upconv3')
            concat3 = tf.concat([upconv3, conv2, updisp4], axis=-1, name='concat3')
            iconv3  = conv_elu(concat3, 3, 130,  64, 1, scope= 'iconv3')
            disp3   = get_disp(iconv3,  64, scope = 'disp3')
            updisp3 = upsampling(disp3, 2)

            #upsampling 2
            upconv2 = upconv(iconv3,    3,  64,   32, scope = 'upconv2')
            concat2 = tf.concat([upconv2, conv1, updisp3], axis=-1, name='concat2')
            iconv2  = conv_elu(concat2, 3,  66,   32, 1, scope= 'iconv2')
            disp2   = get_disp(iconv2,  32, scope = 'disp2')
            updisp2 = upsampling(disp2, 2)

            #upsampling 1
            upconv1 = upconv(iconv2,    3,  32,   16, scope = 'upconv1')
            concat1 = tf.concat([upconv1, updisp2], axis=-1, name='convat1')
            iconv1  = conv_elu(concat1, 3,  18,   16, 1, scope= 'iconv1')
            disp1   = get_disp(iconv1,  16, scope = 'disp1')
        
    return disp1, disp2, disp3 ,disp4
        

In [None]:
def train(train_input_image):
    logdir = '/media/sansii/Software/san_projects/Major_project/Moncular_depth_estimation_data/'
    input_layer = init_placeholder()
    
    disp1, disp2, disp3 , disp4 = make_architecture(input_layer)
    
    writer = tf1.summary.FileWriter(logdir+'./graph' , graph=tf1.get_default_graph())
    
    

In [None]:
batch_size = 32
img_h = 256
img_w = 512
input_shape = [batch_size, img_h, img_w, 3]
train(0)

#Testing Section

In [None]:
tf1.disable_eager_execution()
tf1.reset_default_graph()
n_filters = 64
n_channels = 3
kernel_shape = [7, 7, n_channels,n_filters]
bias_shape = [n_filters]
W = tf.compat.v1.get_variable("weight", kernel_shape, initializer=tf1.glorot_uniform_initializer())
data_path = '/media/sansii/Software/san_projects/Major_project/KITTI_dataset/2015/testing/'
left = ndimage.imread(data_path+"image_2/000083_10.png")

left_shape = (1, left.shape[0], left.shape[1], 3)
input_layer  = tf.compat.v1.placeholder(tf.float32, left_shape,  name='image_left' )
p = np.floor((kernel_shape[0] - 1) / 2).astype('int32')
padding = tf.constant([[0,0],[p, p],[p, p],[0,0]])
p_x = tf.pad(input_layer, padding)
conv = conv2d(p_x, W, stride = 2)
conv = tf.nn.elu(conv)

In [None]:
left.shape

In [None]:
var = tf1.global_variables_initializer()
with tf1.Session() as sess:
    sess.run(var)
    left_ = left[np.newaxis,...]
    print(left_.shape)
    out = sess.run(conv, feed_dict={input_layer: left_})
   
    print(out.shape)



In [None]:
%reset_selective conv2d


In [25]:
class STN:
    def interpolate(self, img, x, y):
        #bilinear_interpolation

        #For corner pixel there is no either left or right / top or down pixels so padding is necessary
        img = tf.pad(img, paddings= ((0,0),(1,1),(1,1),(0,0)))
        
        x = tf.clip_by_value(x, 0.0, tf.cast(self.width,tf.float32)+1.0)

        #since we have padded we need to add plus 1 for our transformed coordinates
        x = x + 1
        y = y + 1

        #since the values are in fraction we need to take floor value which selects left pixels
        x_float   = tf.floor(x)
        y_float   = tf.floor(y)
        x_1_float = x_float + 1
        
        x_1_float = tf.minimum(x_1_float, tf.cast(self.width,tf.float32)+1.0 )
        
        #Since,the index are in integer we convert float into integer
        x_int = tf.cast(x_float, tf.int32)
        y_int = tf.cast(y_float, tf.int32)
        x_1_int = tf.cast(x_1_float, tf.int32)
        
        #we required total dimention for reshaping 
        dim_y  = self.width + 2 * 1#padding
        dim_xy = (self.width + 2) * (self.height + 2)

        #There are number of images with there individual coordinate space now we need to convert
        #individual coordinate space into a single coordinate space
        #eg: x = [ [0,1,2,3], [0,1,2,3], [0,1,2,3]] 
        # y= [[0,0,0,0],[1,1,1,1],[2,2,2,2]]into [0 ,1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        #Converting 2d spatial dimention into 1d vector
        
        base = tf.tile(tf.expand_dims(tf.range(self.num_batches),1) * dim_xy, [1, self.width * self.height])
        base = tf.reshape(base, [-1])
    
        x_l = x_int   + (base + y_int * dim_y)
        x_r = x_1_int + (base + y_int * dim_y)
        
        print(x_l.numpy())
        #Flattering input image
        img_flat = tf.reshape(img, [-1, self.num_channels]) 
        
        #tf.gather selects pixels of img from coordinate x_l and x_r
        #Therefore pixel_l and pixel_r contains only selected coordinates pixels from img
        pixel_l = tf.gather(img_flat, x_l)
        pixel_r = tf.gather(img_flat, x_r)
        
        #Now for bilinear interpolation each left and right pixel must be associated with its respective weights.
        weights_l = tf.expand_dims(x - x_float, 1)
        weights_r = tf.expand_dims(x_1_float - x, 1)
        
        print("Weight", weights_l.shape)
        print("pixel_l", pixel_l.shape)
        
        out = (weights_l * pixel_l) + (weights_r * pixel_r)

        return tf.reshape(out, [self.num_batches, self.height, self.width, self.num_channels])

    def bilinear_sampling(self, img, disparity):
        self.img = img
        self.width       = tf.shape(img)[2]
        self.height      = tf.shape(img)[1]
        self.num_batches = tf.shape(img)[0]
        self.num_channels= tf.shape(img)[3]

        width_f =  tf.cast(self.width , tf.float32)
        height_f = tf.cast(self.height, tf.float32)

        #Creating meshgrid which contains represents coordinates(x, y) of images
        x_grid, y_grid = tf.cast(tf.meshgrid(tf.range(self.width), 
                                     tf.range(self.height)), tf.float32)

        #Flatterning grids
        x_flat = tf.reshape(x_grid, [-1])
        y_flat = tf.reshape(y_grid, [-1])

        #Since, there are num_batches so we need to add up grids for all batches
        x_flat = tf.tile(x_flat, [self.num_batches])
        y_flat = tf.tile(y_flat, [self.num_batches])

        #Flatterning disparity size = (num_batches * weight * height)
        disparity_flat = tf.reshape(disparity, [-1])

        #Adding disparity to find / applying transformation
        x_transf = x_flat + (disparity_flat * width_f) #Since the out of sigmoid funtion in 0 -1 so we muliply with width

        #Transformed coordinates are in fraction since, there is no fraction pixel so we interplate
        out = self.interpolate(img, x_transf, y_flat)



        return out




In [26]:
stn = STN()
a = np.random.randint(0, 10 , size =(2,3,6,1))
b = np.random.randn(2,3,6)
img = tf.constant(a)
disparity = tf.constant(b)
img = tf.cast(img, tf.float32)
disparity = tf.cast(disparity, tf.float32)
out = stn.bilinear_sampling(img, disparity)




[ 9  9 12 12 12 14 17 24 17 24 17 24 27 25 32 25 32 28 49 51 49 49 54 56
 57 61 64 64 57 57 65 72 67 65 65 65]
Weight (36, 1)
pixel_l (36, 1)


In [12]:
x_grid, y_grid = tf.cast(tf.meshgrid(tf.range(3), 
                                 tf.range(3)), tf.float32)
print(x_grid)

tf.Tensor(
[[0. 1. 2.]
 [0. 1. 2.]
 [0. 1. 2.]], shape=(3, 3), dtype=float32)


In [None]:
def SSIM(image, pred_image, block_size):
    
    C1 = 0.01 ** 2
    C2 = 0.03 ** 2
    
    u_x = tf.nn.avg_pool2d(image      ,block_size, strides=1, padding='SAME')
    u_y = tf.nn.avg_pool2d(pred_image ,block_size, strides=1, padding='SAME')

    sigma_x  = tf.nn.avg_pool2d(image**2           ,block_size, strides=1, padding='SAME') - u_x**2
    sigma_y  = tf.nn.avg_pool2d(pred_image**2      ,block_size, strides=1, padding='SAME') - u_y**2
    sigma_xy = tf.nn.avg_pool2d(image * pred_image ,block_size, strides=1, padding='SAME') - u_x * u_y
    
    SSIM_num = ((2 * u_x * u_y + C1)   * (2 * sigma_x * sigma_y   + C2)) 
    SSIM_den = ((u_x**2 + u_y**2 + C1) * (sigma_x**2 + sigma_y**2 + C2))
    
    SSIM = SSIM_num / SSIM_den
    
    return tf.clip_by_value((1 - SSIM) / 2, 0, 1)

def apperance_matching_loss(image, pred_image):
    alpha = 0.85
    L1_error = tf.abs(image - pred_image)
    ssim_error = SSIM(image, pred_image, 3)
    
    C_ap = tf.reduce_mean((alpha * ssim_error + (1 - alpha) * L1_error))
    
    return apperance_matching_loss


def disparity_smoothness_loss(disp, image):
    
    disp_gradient_y , disp_gradient_x  = tf.image.image_gradients(disp)
    image_gradient_y, image_gradient_x = tf.image.image_gradients(image)

    im_dx = -tf.reduce_mean(tf.abs(image_gradient_x),axis=-1, keepdims=True)
    im_dy = -tf.reduce_mean(tf.abs(image_gradient_y),axis=-1, keepdims=True)
    
    loss_dx = tf.multiply(tf.abs(disp_gradient_x), tf.math.exp(im_dx))
    loss_dy = tf.multiply(tf.abs(disp_gradient_y), tf.math.exp(im_dy))
    
    disp_smoothness_loss = tf.reduce_mean((loss_dx + loss_dy))
    
    
    return disp_smoothness_loss


def left_right_consistency_loss(disp_left, disp_right):
    
     
        


def training_loss(image, pred_image):
    #Apperance Loss
    alpha = 0.85
    
    

In [None]:
t = tf.constant([[1., 2,  3], [4,  5,  6]])
t2 = tf.clip_by_value(t, clip_value_min=-1, clip_value_max=1)

exp = tf.math.exp(t)
with tf1.Session() as sess:
    e = sess.run(exp)
    print(e)
    
t2.numpy()

In [None]:
BATCH_SIZE = 1
IMAGE_HEIGHT = 5
IMAGE_WIDTH = 5
CHANNELS = 1
image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS,
  delta=1, dtype=tf.float32),
  shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
dx, dy = tf.image.image_gradients(image)
exp = 
with tf1.Session() as sess:
    d_y, d_x = sess.run([dx, dy])
    print(d_y)
   

In [None]:
dCost_dW.numpy()

#Comparision_Section

In [None]:
def disparity_smoothness_loss(disp, image):
    
    disp_gradient_y , disp_gradient_x  = tf.image.image_gradients(disp)
    image_gradient_y, image_gradient_x = tf.image.image_gradients(image)

    im_dx = -tf.reduce_mean(tf.abs(image_gradient_x),axis=-1, keepdims=True)
    im_dy = -tf.reduce_mean(tf.abs(image_gradient_y),axis=-1, keepdims=True)
    
    loss_dx = tf.multiply(tf.abs(disp_gradient_x), tf.math.exp(im_dx))
    loss_dy = tf.multiply(tf.abs(disp_gradient_y), tf.math.exp(im_dy))
    
    disp_smoothness_loss = tf.reduce_mean((loss_dx + loss_dy))
    
    
    return disp_smoothness_loss, loss_dx, loss_dy


class smoothness_losses:
    
    def gradient_x(self, img):
        gx = img[:,:,:-1,:] - img[:,:,1:,:]
        return tf.pad(gx,paddings = ((0,0),(0,0),(0,1),(0,0)))

    def gradient_y(self, img):
        gy = img[:,:-1,:,:] - img[:,1:,:,:]
        return tf.pad(gy,paddings=((0,0),(0,1),(0,0),(0,0)))

    def get_disparity_smoothness(self, disp, pyramid):
        disp_gradients_x = [self.gradient_x(d) for d in disp]
        disp_gradients_y = [self.gradient_y(d) for d in disp]

        image_gradients_x = [self.gradient_x(img) for img in pyramid]
        image_gradients_y = [self.gradient_y(img) for img in pyramid]

        weights_x = [tf.exp(-tf.reduce_mean(tf.abs(g), 3, keepdims=True)) for g in image_gradients_x]
        weights_y = [tf.exp(-tf.reduce_mean(tf.abs(g), 3, keepdims=True)) for g in image_gradients_y]

        smoothness_x = [tf.abs(disp_gradients_x[i]) * weights_x[i] for i in range(1)]
        smoothness_y = [tf.abs(disp_gradients_y[i]) * weights_y[i] for i in range(1)]
        return tf.reduce_mean((smoothness_x[0] + smoothness_y[0])) , smoothness_x[0], smoothness_y[0]

In [None]:

paper = smoothness_losses()

In [None]:

num_disp  = np.random.randint(0, 20, size=(1, 1, 5, 10, 1))
num_image = np.random.randint(0, 30, size=(1, 1, 5, 10, 3))
disp  = tf.constant(num_disp,dtype=tf.float32)
image = tf.constant(num_image, dtype=tf.float32)
loss_smooth_ours, dx_our, dd_our = disparity_smoothness_loss(disp[0], image[0])
loss_smooth,dx, dd                 = paper.get_disparity_smoothness(disp, image)


print((dx_our.numpy()==
       dx.numpy()))
print(loss_smooth.numpy(), loss_smooth_ours.numpy())
# with tf1.Session() as sess:
    
#     sess.run([loss_smooth_ours])
    

In [None]:
out = np.zeros_like(num_disp)
out[:,:,:-1,:] = num_disp[:,:,:-1,:] - num_disp[:,:,1:,:]
print(out[:,:,-1:,0])

In [None]:
x_t, y_t = np.meshgrid(np.linspace(-1, 1, 5), np.linspace(-1, 1, 5))
theta = np.array([[0.7, -0.7, 0], [0.7, 0.7, 0]])
theta = np.array([[2, 0, 1], [0, 1, 0]])
grid = np.array([x_t, y_t, 1])

In [None]:
out = np.dot(theta, grid)

In [None]:
print(grid)
print(out)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(x_t, y_t)
plt.axis([-3,3,-3,3])
plt.grid()

In [None]:
plt.scatter(out[0], out[1])
plt.axis([-3,3,-3,3])
plt.grid()

In [None]:
a = np.random.randint(0, 10, size =(3, 5))
i = tf.constant(a)
    index = [0,1]
    out = tf.gather(i, index,axis=-1)
    out = tf.gather(out, index, axis=0)

In [None]:
print(i)
print(out)

In [None]:
a = np.random.randint(0, 10, size =(2, 5, 10))
b = np.random.randn(2, 5, 10)
print(b.shape)
img = tf.constant(a)
img = tf.cast(img, tf.float32)
x_offset = tf.constant(b)
x_offset = tf.cast(x_offset, tf.float32)
_height = tf.shape(img)[1]
_width = tf.shape(img)[2]
_height_f  = tf.cast(_height, tf.float32)
_width_f  = tf.cast(_width,  tf.float32)
_num_batch = tf.shape(img)[0]

def _transform(input_images, x_offset):
    with tf1.variable_scope('transform'):
        # grid of (x_t, y_t, 1), eq (1) in ref [1]
        x_t, y_t = tf.meshgrid(tf.linspace(0.0,   _width_f - 1.0,  _width),
                               tf.linspace(0.0 , _height_f - 1.0 , _height))

        x_t_flat = tf.reshape(x_t, (1, -1))
        y_t_flat = tf.reshape(y_t, (1, -1))
        
        stack = tf.stack([_num_batch,1])

        x_t_flat = tf.tile(x_t_flat, tf.stack([_num_batch, 1]))
        y_t_flat = tf.tile(y_t_flat, tf.stack([_num_batch, 1]))

        x_t_flat = tf.reshape(x_t_flat, [-1])
        y_t_flat = tf.reshape(y_t_flat, [-1])
        
        print(x_t_flat.dtype)
        print(x_offset.dtype)

        #x_t_flat = x_t_flat + tf.reshape(x_offset, [-1]) * _width_f

#         input_transformed = _interpolate(input_images, x_t_flat, y_t_flat)

#         output = tf.reshape(
#             input_transformed, tf.stack([_num_batch, _height, _width, _num_channels]))
        return x_t_flat, y_t_flat, stack

In [None]:
x_t, y_t, stack = _transform(img, x_offset)

In [None]:
base = tf.tile(tf.expand_dims(tf.range(_num_batch)* (_height * _width), 1 ),[1,( _height * _width)])
base = tf.reshape(base, [-1])
base = tf.cast(base, tf.int32)
y_t  = tf.cast(y_t, tf.int32)
x_t  = tf.cast(x_t, tf.int32)
print(base)
print(x_t.numpy() + (base + y_t * 5))

In [None]:
a = np.random.randint(0, 5,  size =(3, 3))
a = [[0],[50]]
img = tf.constant(a)
#img = tf.reshape(img, [1,-1])
out = tf.tile(img,[2,1])
print(img)
print(out)

In [None]:
stack = tf.stack([_num_batch])
print(stack.numpy())

In [None]:
a = np.random.randint(0, 5,  size =(3, 3))
img = tf.constant(a)
img = tf.reshape(img,[-1])