# Import and fake data

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Lambda, Activation, Conv2D, MaxPooling2D, ZeroPadding2D, Reshape, Concatenate
from tensorflow.keras.regularizers import l2
from optical_flow_warp_old import transformer_old
import numpy as np

# pwc_flow

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Lambda, Activation, Conv2D, MaxPooling2D, ZeroPadding2D, Reshape, Concatenate
from tensorflow.keras.regularizers import l2
from optical_flow_warp_old import transformer_old
import numpy as np

In [3]:
def feature_pyramid_flow(image):
    cnv1 = tf.keras.layers.Conv2D(16, (3, 3), strides=2, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(image)
    cnv2 = tf.keras.layers.Conv2D(16, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv1)
    cnv3 = tf.keras.layers.Conv2D(32, (3, 3), strides=2, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv2)
    cnv4 = tf.keras.layers.Conv2D(32, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv3)
    cnv5 = tf.keras.layers.Conv2D(64, (3, 3), strides=2, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv4)
    cnv6 = tf.keras.layers.Conv2D(64, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv5)
    cnv7 = tf.keras.layers.Conv2D(96, (3, 3), strides=2, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv6)
    cnv8 = tf.keras.layers.Conv2D(96, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv7)
    cnv9 = tf.keras.layers.Conv2D(128, (3, 3), strides=2, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv8)
    cnv10 = tf.keras.layers.Conv2D(128, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv9)
    cnv11 = tf.keras.layers.Conv2D(192, (3, 3), strides=2, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv10)
    cnv12 = tf.keras.layers.Conv2D(192, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv11)

    return cnv2, cnv4, cnv6, cnv8, cnv10, cnv12

In [4]:
def cost_volumn(feature1, feature2, d=4):
    batch_size, H, W, feature_num = map(int, feature1.get_shape()[0:4])
    feature2 = tf.pad(feature2, [[0, 0], [d, d], [d, d], [0, 0]], "CONSTANT")
    cv = []
    for i in range(2 * d + 1):
        for j in range(2 * d + 1):
            cv.append(
                tf.math.reduce_mean(
                    feature1 * feature2[:, i:(i + H), j:(j + W), :],
                    axis=3,
                    keepdims=True
                ))
    return tf.concat(cv, axis=3)

In [5]:
def optical_flow_decoder_dc(inputs, level):
    cnv1 = tf.keras.layers.Conv2D(128, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(inputs)
    cnv2 = tf.keras.layers.Conv2D(128, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(cnv1)
    cnv3 = tf.keras.layers.Conv2D(96, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(tf.concat([cnv1, cnv2], axis=3))
    cnv4 = tf.keras.layers.Conv2D(64, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(tf.concat([cnv2, cnv3], axis=3))
    cnv5 = tf.keras.layers.Conv2D(32, (3, 3), strides=1, padding='same', activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(tf.concat([cnv3, cnv4], axis=3))                                                                                                                                       
    flow = tf.keras.layers.Conv2D(2, (3, 3), strides=1, padding='same', activation=None, kernel_regularizer=tf.keras.regularizers.L2(0.0004))(tf.concat([cnv4, cnv5], axis=3))                                                                                                                                       

    return flow, cnv5

In [6]:
def context_net(inputs):
#     with slim.arg_scope(
#         [slim.conv2d, slim.conv2d_transpose],
#             weights_regularizer=slim.l2_regularizer(0.0004),
#             activation_fn=leaky_relu):
#         cnv1 = slim.conv2d(inputs, 128, [3, 3], rate=1, scope="cnv1_cn")
        cnv1 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', strides=1, activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004),dilation_rate=(1, 1))(inputs)
#         cnv2 = slim.conv2d(cnv1, 128, [3, 3], rate=2, scope="cnv2_cn")
        cnv2 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', strides=1, activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004),dilation_rate=(2, 2))(cnv1)
#         cnv3 = slim.conv2d(cnv2, 128, [3, 3], rate=4, scope="cnv3_cn")
        cnv3 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', strides=1, activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004),dilation_rate=(4, 4))(cnv2)
#         cnv4 = slim.conv2d(cnv3, 96, [3, 3], rate=8, scope="cnv4_cn")
        cnv4 = tf.keras.layers.Conv2D(96, (3, 3), padding='same', strides=1, activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004),dilation_rate=(8, 8))(cnv3)
#         cnv5 = slim.conv2d(cnv4, 64, [3, 3], rate=16, scope="cnv5_cn")
        cnv5 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', strides=1, activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004),dilation_rate=(16, 16))(cnv4)
#         cnv6 = slim.conv2d(cnv5, 32, [3, 3], rate=1, scope="cnv6_cn")
        cnv6 = tf.keras.layers.Conv2D(32, (3, 3), padding='same', strides=1, activation=tf.nn.leaky_relu, kernel_regularizer=tf.keras.regularizers.L2(0.0004),dilation_rate=(1, 1))(cnv5)

#         flow = slim.conv2d(cnv6, 2, [3, 3], rate=1, scope="cnv7_cn", activation_fn=None)
        flow = tf.keras.layers.Conv2D(2, (3, 3), padding='same', strides=1, activation=None, kernel_regularizer=tf.keras.regularizers.L2(0.0004),dilation_rate=(1, 1))(cnv6)
        return flow

# optical_flow_warp_old

In [7]:
# import tensorflow as tf
# import numpy as np


# def transformer_old(U, flo, out_size, name='SpatialTransformer', **kwargs):
#     """Backward warping layer

#     Implements a backward warping layer described in 
#     "Unsupervised Deep Learning for Optical Flow Estimation, Zhe Ren et al"

#     Parameters
#     ----------
#     U : float
#         The output of a convolutional net should have the
#         shape [num_batch, height, width, num_channels].
#     flo: float
#          The optical flow used to do the backward warping.
#          shape is [num_batch, height, width, 2]
#     out_size: tuple of two ints
#         The size of the output of the network (height, width)
#     """

#     def _repeat(x, n_repeats):
#         with tf.compat.v1.variable_scope('_repeat'):
#             rep = tf.transpose(
#                 tf.expand_dims(
#                     tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0])
#             rep = tf.cast(rep, 'int32')
#             x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
#             return tf.reshape(x, [-1])

#     def _interpolate(im, x, y, out_size):
#         # constants
#         num_batch = tf.shape(im)[0]
#         height = tf.shape(im)[1]
#         width = tf.shape(im)[2]
#         channels = tf.shape(im)[3]

#         x = tf.cast(x, 'float32')
#         y = tf.cast(y, 'float32')
#         height_f = tf.cast(height, 'float32')
#         width_f = tf.cast(width, 'float32')
#         out_height = out_size[0]
#         out_width = out_size[1]
#         zero = tf.zeros([], dtype='int32')
#         max_y = tf.cast(tf.shape(im)[1] - 1, 'int32')
#         max_x = tf.cast(tf.shape(im)[2] - 1, 'int32')

#         # scale indices from [-1, 1] to [0, width/height]
#         x = (x + 1.0) * (width_f - 1) / 2.0
#         y = (y + 1.0) * (height_f - 1) / 2.0

#         # do sampling
#         x0 = tf.cast(tf.floor(x), 'int32')
#         x1 = x0 + 1
#         y0 = tf.cast(tf.floor(y), 'int32')
#         y1 = y0 + 1

#         x0_c = tf.clip_by_value(x0, zero, max_x)
#         x1_c = tf.clip_by_value(x1, zero, max_x)
#         y0_c = tf.clip_by_value(y0, zero, max_y)
#         y1_c = tf.clip_by_value(y1, zero, max_y)

#         dim2 = width
#         dim1 = width * height
#         base = _repeat(tf.range(num_batch) * dim1, out_height * out_width)

#         base_y0 = base + y0_c * dim2
#         base_y1 = base + y1_c * dim2
#         idx_a = base_y0 + x0_c
#         idx_b = base_y1 + x0_c
#         idx_c = base_y0 + x1_c
#         idx_d = base_y1 + x1_c

#         # use indices to lookup pixels in the flat image and restore
#         # channels dim
#         im_flat = tf.reshape(im, tf.stack([-1, channels]))
#         im_flat = tf.cast(im_flat, 'float32')
#         Ia = tf.gather(im_flat, idx_a)
#         Ib = tf.gather(im_flat, idx_b)
#         Ic = tf.gather(im_flat, idx_c)
#         Id = tf.gather(im_flat, idx_d)

#         # and finally calculate interpolated values
#         x0_f = tf.cast(x0, 'float32')
#         x1_f = tf.cast(x1, 'float32')
#         y0_f = tf.cast(y0, 'float32')
#         y1_f = tf.cast(y1, 'float32')
#         wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1)
#         wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1)
#         wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1)
#         wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1)
#         output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
#         return output

#     def _meshgrid(height, width):
#         # This should be equivalent to:
#         #  x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
#         #                         np.linspace(-1, 1, height))
#         #  ones = np.ones(np.prod(x_t.shape))
#         #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
#         x_t = tf.matmul(
#             tf.ones(shape=tf.stack([int(height), 1])),
#             tf.transpose(
#                 tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
#         y_t = tf.matmul(
#             tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
#             tf.ones(shape=tf.stack([1, width])))

#         return x_t, y_t

#     def _transform(flo, input_dim, out_size):
#         with tf.compat.v1.variable_scope('_transform'):
#             num_batch = tf.shape(input_dim)[0]
#             height = tf.shape(input_dim)[1]
#             width = tf.shape(input_dim)[2]
#             num_channels = tf.shape(input_dim)[3]

#             # grid of (x_t, y_t, 1), eq (1) in ref [1]
#             height_f = tf.cast(height, 'float32')
#             width_f = tf.cast(width, 'float32')
#             out_height = out_size[0]
#             out_width = out_size[1]
#             x_t, y_t = _meshgrid(out_height, out_width)
#             x_t = tf.expand_dims(x_t, 0)
#             x_t = tf.tile(x_t, [num_batch, 1, 1])

#             y_t = tf.expand_dims(y_t, 0)
#             y_t = tf.tile(y_t, [num_batch, 1, 1])

#             x_s = x_t + flo[:, :, :, 0] / (
#                 (tf.cast(out_width, tf.float32) - 1.0) / 2.0)
#             y_s = y_t + flo[:, :, :, 1] / (
#                 (tf.cast(out_height, tf.float32) - 1.0) / 2.0)

#             x_s_flat = tf.reshape(x_s, [-1])
#             y_s_flat = tf.reshape(y_s, [-1])

#             input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat,
#                                              out_size)

#             output = tf.reshape(
#                 input_transformed,
#                 tf.stack([num_batch, out_height, out_width, num_channels]))
#             return output

#     with tf.compat.v1.variable_scope(name):
#         output = _transform(flo, U, out_size)
#         return output


# # def main(unused_argv):
# #     sess = tf.Session(config=tf.ConfigProto(
# #         allow_soft_placement=True, log_device_placement=False))

# #     image = tf.constant(
# #         [1, 2, 3, 4, 5, 6, 7, 8, 9], shape=[1, 3, 3, 1], dtype="float32")

# #     flo = np.zeros((1, 3, 3, 2))
# #     flo[0, 1, 1, 0] = 1.0
# #     #flo[0, 1, 1, 1] = 1.0
# #     flo = tf.constant(flo, dtype="float32")

# #     image2 = transformer_old(image, flo, [3, 3])

# #     print(image2.eval(session=sess))


# # if __name__ == '__main__':
# #     app.run()


# pwc_flow

In [8]:
def construct_model_pwc_full(image1, image2, feature1, feature2):
# with tf.variable_scope('flow_net'):
    batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

    #############################
    feature1_1, feature1_2, feature1_3, feature1_4, feature1_5, feature1_6 = feature1
    feature2_1, feature2_2, feature2_3, feature2_4, feature2_5, feature2_6 = feature2

    cv6 = cost_volumn(feature1_6, feature2_6, d=4)
    flow6, _ = optical_flow_decoder_dc(cv6, level=6)

    flow6to5 = tf.compat.v1.image.resize_bilinear(flow6,
                                        [H // (2**5), (W // (2**5))]) * 2.0
    
    feature2_5w = transformer_old(feature2_5, flow6to5, [H // 32, W // 32])
    cv5 = cost_volumn(feature1_5, feature2_5w, d=4)
    flow5, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv5, feature1_5, flow6to5], axis=3), level=5)
    flow5 = flow5 + flow6to5

    flow5to4 = tf.compat.v1.image.resize_bilinear(flow5,
                                        [H // (2**4), (W // (2**4))]) * 2.0
    feature2_4w = transformer_old(feature2_4, flow5to4, [H // 16, W // 16])
    cv4 = cost_volumn(feature1_4, feature2_4w, d=4)
    flow4, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv4, feature1_4, flow5to4], axis=3), level=4)
    flow4 = flow4 + flow5to4

    flow4to3 = tf.compat.v1.image.resize_bilinear(flow4,
                                        [H // (2**3), (W // (2**3))]) * 2.0
    feature2_3w = transformer_old(feature2_3, flow4to3, [H // 8, W // 8])
    cv3 = cost_volumn(feature1_3, feature2_3w, d=4)
    flow3, _ = optical_flow_decoder_dc(
        tf.concat(
            [cv3, feature1_3, flow4to3], axis=3), level=3)
    flow3 = flow3 + flow4to3

    flow3to2 = tf.compat.v1.image.resize_bilinear(flow3,
                                        [H // (2**2), (W // (2**2))]) * 2.0
    feature2_2w = transformer_old(feature2_2, flow3to2, [H // 4, W // 4])
    cv2 = cost_volumn(feature1_2, feature2_2w, d=4)
    flow2_raw, f2 = optical_flow_decoder_dc(
        tf.concat(
            [cv2, feature1_2, flow3to2], axis=3), level=2)
    flow2_raw = flow2_raw + flow3to2

    flow2 = context_net(tf.concat([flow2_raw, f2], axis=3)) + flow2_raw

    flow0_enlarge = tf.compat.v1.image.resize_bilinear(flow2 * 4.0, [H, W])
    flow1_enlarge = tf.compat.v1.image.resize_bilinear(flow3 * 4.0, [H // 2, W // 2])
    flow2_enlarge = tf.compat.v1.image.resize_bilinear(flow4 * 4.0, [H // 4, W // 4])
    flow3_enlarge = tf.compat.v1.image.resize_bilinear(flow5 * 4.0, [H // 8, W // 8])

    return flow0_enlarge, flow1_enlarge, flow2_enlarge, flow3_enlarge

# Test construct_model_pwc_full

In [9]:
from fake_parse import fake_parse
from dataloader_reimplement import generator_train

In [10]:
opt = fake_parse()

In [11]:
my_dataset = tf.data.Dataset.from_generator(
    lambda: generator_train(opt),
    output_signature=(
        tf.TensorSpec(shape=(None,None,3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,None,3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,None,3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,None,3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,None,3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,None,3), dtype=tf.float32),
    ))
my_dataset = my_dataset.batch(4)

In [12]:
for element in my_dataset:
    left_image_o, right_image_o, next_left_image_o, next_right_image_o, proj_cam2pix, proj_pix2cam = element
    feature1 = feature_pyramid_flow(left_image_o)
#     print(feature1.get_shape())
    feature2 = feature_pyramid_flow(right_image_o)
#     print(feature2.get_shape())
    flow0_enlarge, flow1_enlarge, flow2_enlarge, flow3_enlarge = construct_model_pwc_full(left_image_o, right_image_o, feature1, feature2)
    
    
    
    
#     print(cnv2.shape)
#     print(cnv4.shape)
#     print(cnv6.shape)
#     print(cnv8.shape)
#     print(cnv10.shape)
#     print(cnv12.shape)
#     print("_________")

#     def train(model, dataset, optimizer):
#     for step, (x1, x2, y) in enumerate(dataset):
#         with tf.GradientTape() as tape:
#             left, right = model([x1, x2])
#             loss = contrastive_loss(left, right, tf.cast(y, tf.float32))
#         gradients = tape.gradient(loss, model.trainable_variables)
#         optimizer.apply_gradients(zip(gradients, model.trainable_variables))