In [1]:
import tensorflow as tf
from fake_parse import fake_parse

In [2]:
opt = fake_parse()

In [3]:
tf.VERSION

'1.15.0'

In [4]:
def string_length_tf(t):
    return tf.py_func(len, [t], [tf.int64])

In [5]:
def rescale_intrinsics(raw_cam_mat, opt, orig_height, orig_width):
    fx = raw_cam_mat[0, 0]
    fy = raw_cam_mat[1, 1]
    cx = raw_cam_mat[0, 2]
    cy = raw_cam_mat[1, 2]
    r1 = tf.stack(
        [fx * opt.img_width / orig_width, 0, cx * opt.img_width / orig_width])
    r2 = tf.stack([
        0, fy * opt.img_height / orig_height, cy * opt.img_height / orig_height
    ])
    r3 = tf.constant([0., 0., 1.])
    return tf.stack([r1, r2, r3])

In [6]:
def get_multi_scale_intrinsics(raw_cam_mat, num_scales):
    proj_cam2pix = []
    # Scale the intrinsics accordingly for each scale
    for s in range(num_scales):
        fx = raw_cam_mat[0, 0] / (2**s)
        fy = raw_cam_mat[1, 1] / (2**s)
        cx = raw_cam_mat[0, 2] / (2**s)
        cy = raw_cam_mat[1, 2] / (2**s)
        r1 = tf.stack([fx, 0, cx])
        r2 = tf.stack([0, fy, cy])
        r3 = tf.constant([0., 0., 1.])
        proj_cam2pix.append(tf.stack([r1, r2, r3]))
    proj_cam2pix = tf.stack(proj_cam2pix)
    proj_pix2cam = tf.matrix_inverse(proj_cam2pix)
    proj_cam2pix.set_shape([num_scales, 3, 3])
    proj_pix2cam.set_shape([num_scales, 3, 3])
    return proj_cam2pix, proj_pix2cam

In [7]:
def make_intrinsics_matrix(fx, fy, cx, cy):
    # Assumes batch input
    batch_size = fx.get_shape().as_list()[0]
    zeros = tf.zeros_like(fx)
    r1 = tf.stack([fx, zeros, cx], axis=1)
    r2 = tf.stack([zeros, fy, cy], axis=1)
    r3 = tf.constant([0., 0., 1.], shape=[1, 3])
    r3 = tf.tile(r3, [batch_size, 1])
    intrinsics = tf.stack([r1, r2, r3], axis=1)
    return intrinsics

In [8]:
def data_augmentation(im, intrinsics, out_h, out_w):
    # Random scaling
    def random_scaling(im, intrinsics):
        batch_size, in_h, in_w, _ = im.get_shape().as_list()
        scaling = tf.random.uniform([2], 1, 1.15)
        x_scaling = scaling[0]
        y_scaling = scaling[1]
        out_h = tf.cast(in_h * y_scaling, dtype=tf.int32)
        out_w = tf.cast(in_w * x_scaling, dtype=tf.int32)
        im = tf.compat.v1.image.resize(im, [out_h, out_w], method=tf.compat.v1.image.ResizeMethod.AREA)
        fx = intrinsics[:, 0, 0] * x_scaling
        fy = intrinsics[:, 1, 1] * y_scaling
        cx = intrinsics[:, 0, 2] * x_scaling
        cy = intrinsics[:, 1, 2] * y_scaling
        intrinsics = make_intrinsics_matrix(fx, fy, cx, cy)
        return im, intrinsics

    # Random cropping
    def random_cropping(im, intrinsics, out_h, out_w):
        # batch_size, in_h, in_w, _ = im.get_shape().as_list()
        batch_size, in_h, in_w, _ = tf.unstack(tf.shape(input=im))
        offset_y = tf.random.uniform(
            [1], 0, in_h - out_h + 1, dtype=tf.int32)[0]
        offset_x = tf.random.uniform(
            [1], 0, in_w - out_w + 1, dtype=tf.int32)[0]
        im = tf.image.crop_to_bounding_box(im, offset_y, offset_x, out_h,
                                           out_w)
        fx = intrinsics[:, 0, 0]
        fy = intrinsics[:, 1, 1]
        cx = intrinsics[:, 0, 2] - tf.cast(offset_x, dtype=tf.float32)
        cy = intrinsics[:, 1, 2] - tf.cast(offset_y, dtype=tf.float32)
        intrinsics = make_intrinsics_matrix(fx, fy, cx, cy)
        return im, intrinsics

    im, intrinsics = random_scaling(im, intrinsics)
    im, intrinsics = random_cropping(im, intrinsics, out_h, out_w)
    return im, intrinsics

In [9]:
class MonodepthDataloader(object):
    """monodepth dataloader"""

    def __init__(self, opt):
        self.data_path = opt.data_dir
        self.opt = opt
        filenames_file = opt.train_file

        input_queue = tf.train.string_input_producer(
            [filenames_file], shuffle=False)
        line_reader = tf.TextLineReader()
        _, line = line_reader.read(input_queue)

        split_line = tf.string_split([line]).values

        # we load only one image for test, except if we trained a stereo model
        left_image_path = tf.string_join([self.data_path, split_line[0]])
        right_image_path = tf.string_join([self.data_path, split_line[1]])
        next_left_image_path = tf.string_join([self.data_path, split_line[2]])
        next_right_image_path = tf.string_join([self.data_path, split_line[3]])
        cam_intrinsic_path = tf.string_join([self.data_path, split_line[4]])

        left_image_o, orig_height, orig_width = self.read_image(
            left_image_path, get_shape=True)
        right_image_o = self.read_image(right_image_path)
        next_left_image_o = self.read_image(next_left_image_path)
        next_right_image_o = self.read_image(next_right_image_path)

        # randomly flip images
        do_flip = tf.random_uniform([], 0, 1)
        left_image = tf.cond(do_flip > 0.5,
                             lambda: tf.image.flip_left_right(right_image_o),
                             lambda: left_image_o)
        right_image = tf.cond(do_flip > 0.5,
                              lambda: tf.image.flip_left_right(left_image_o),
                              lambda: right_image_o)
        next_left_image = tf.cond(
            do_flip > 0.5,
            lambda: tf.image.flip_left_right(next_right_image_o),
            lambda: next_left_image_o)
        next_right_image = tf.cond(
            do_flip > 0.5, lambda: tf.image.flip_left_right(next_left_image_o),
            lambda: next_right_image_o)

        do_flip_fb = tf.random_uniform([], 0, 1)
        left_image, right_image, next_left_image, next_right_image = tf.cond(
            do_flip_fb > 0.5,
            lambda: (next_left_image, next_right_image, left_image, right_image),
            lambda: (left_image, right_image, next_left_image, next_right_image)
        )

        # randomly augment images
        #         do_augment  = tf.random_uniform([], 0, 0)
        #         image_list = [left_image, right_image, next_left_image, next_right_image]
        #         left_image, right_image, next_left_image, next_right_image = tf.cond(do_augment > 0.5, 
        #                                                                              lambda: self.augment_image_list(image_list), 
        #                                                                              lambda: image_list)

        left_image.set_shape([None, None, 3])
        right_image.set_shape([None, None, 3])
        next_left_image.set_shape([None, None, 3])
        next_right_image.set_shape([None, None, 3])

        raw_cam_contents = tf.read_file(cam_intrinsic_path)
        last_line = tf.string_split(
            [raw_cam_contents], delimiter="\n").values[-1]
        raw_cam_vec = tf.string_to_number(
            tf.string_split([last_line]).values[1:])
        raw_cam_mat = tf.reshape(raw_cam_vec, [3, 4])
        raw_cam_mat = raw_cam_mat[0:3, 0:3]
        raw_cam_mat = rescale_intrinsics(raw_cam_mat, opt, orig_height,
                                         orig_width)

        # Scale and crop augmentation
        #         im_batch = tf.concat([tf.expand_dims(left_image, 0), 
        #                          tf.expand_dims(right_image, 0),
        #                          tf.expand_dims(next_left_image, 0),
        #                          tf.expand_dims(next_right_image, 0)], axis=3)
        #         raw_cam_mat_batch = tf.expand_dims(raw_cam_mat, axis=0)
        #         im_batch, raw_cam_mat_batch = data_augmentation(im_batch, raw_cam_mat_batch, self.opt.img_height, self.opt.img_width)
        #         left_image, right_image, next_left_image, next_right_image = tf.split(im_batch[0,:,:,:], num_or_size_splits=4, axis=2)
        #         raw_cam_mat = raw_cam_mat_batch[0,:,:]

        proj_cam2pix, proj_pix2cam = get_multi_scale_intrinsics(raw_cam_mat,
                                                                opt.num_scales)

        # capacity = min_after_dequeue + (num_threads + a small safety margin) * batch_size
        min_after_dequeue = 2048
        capacity = min_after_dequeue + 4 * opt.batch_size
        self.data_batch = tf.train.shuffle_batch([
            left_image, right_image, next_left_image, next_right_image,
            proj_cam2pix, proj_pix2cam
        ], opt.batch_size, capacity, min_after_dequeue, 10)

    def augment_image_pair(self, left_image, right_image):
        # randomly shift gamma
        random_gamma = tf.random_uniform([], 0.8, 1.2)
        left_image_aug = left_image**random_gamma
        right_image_aug = right_image**random_gamma

        # randomly shift brightness
        random_brightness = tf.random_uniform([], 0.5, 2.0)
        left_image_aug = left_image_aug * random_brightness
        right_image_aug = right_image_aug * random_brightness

        # randomly shift color
        random_colors = tf.random_uniform([3], 0.8, 1.2)
        white = tf.ones([tf.shape(left_image)[0], tf.shape(left_image)[1]])
        color_image = tf.stack(
            [white * random_colors[i] for i in range(3)], axis=2)
        left_image_aug *= color_image
        right_image_aug *= color_image

        # saturate
        left_image_aug = tf.clip_by_value(left_image_aug, 0, 1)
        right_image_aug = tf.clip_by_value(right_image_aug, 0, 1)

        return left_image_aug, right_image_aug

    def augment_image_list(self, image_list):
        # randomly shift gamma
        random_gamma = tf.random_uniform([], 0.8, 1.2)
        image_list = [img**random_gamma for img in image_list]

        # randomly shift brightness
        random_brightness = tf.random_uniform([], 0.5, 2.0)
        image_list = [img * random_brightness for img in image_list]

        # randomly shift color
        random_colors = tf.random_uniform([3], 0.8, 1.2)
        white = tf.ones(
            [tf.shape(image_list[0])[0], tf.shape(image_list[0])[1]])
        color_image = tf.stack(
            [white * random_colors[i] for i in range(3)], axis=2)
        image_list = [img * color_image for img in image_list]

        # saturate
        image_list = [tf.clip_by_value(img, 0, 1) for img in image_list]

        return image_list

    def read_image(self, image_path, get_shape=False):
        # tf.decode_image does not return the image size, this is an ugly workaround to handle both jpeg and png
        path_length = string_length_tf(image_path)[0]
        file_extension = tf.substr(image_path, path_length - 3, 3)
        file_cond = tf.equal(file_extension, 'jpg')

        image = tf.cond(
            file_cond, lambda: tf.image.decode_jpeg(tf.read_file(image_path)),
            lambda: tf.image.decode_png(tf.read_file(image_path)))
        orig_height = tf.cast(tf.shape(image)[0], "float32")
        orig_width = tf.cast(tf.shape(image)[1], "float32")

        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize_images(
            image, [self.opt.img_height, self.opt.img_width],
            tf.image.ResizeMethod.AREA)

        if get_shape:
            return image, orig_height, orig_width
        else:
            return image

In [10]:
dataloader = MonodepthDataloader(opt)

Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TextLineDataset`.
Instructions for updating:
tf.py_func

In [11]:
image1, image_r, image2, image2_r, proj_cam2pix, proj_pix2cam = MonodepthDataloader(opt).data_batch

In [12]:
image1

<tf.Tensor 'shuffle_batch_1:0' shape=(4, 256, 832, 3) dtype=float32>

In [20]:
split_image1 = tf.split(
    axis=0, num_or_size_splits=1, value=image1)
split_image2 = tf.split(
    axis=0, num_or_size_splits=1, value=image2)
split_cam2pix = tf.split(
    axis=0, num_or_size_splits=1, value=proj_cam2pix)
split_pix2cam = tf.split(
    axis=0, num_or_size_splits=1, value=proj_pix2cam)
split_image_r = tf.split(
    axis=0, num_or_size_splits=1, value=image_r)
split_image_r_next = tf.split(
    axis=0, num_or_size_splits=1, value=image2_r)

In [22]:
i = 0
image1=split_image1[i]
image2=split_image2[i]
image1r=split_image_r[i]
image2r=split_image_r_next[i]
cam2pix=split_cam2pix[i]
pix2cam=split_pix2cam[i]

In [25]:
import tensorflow.contrib.slim as slim
def feature_pyramid_flow(image, reuse):
    with tf.variable_scope('feature_net_flow'):
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose],
                weights_regularizer=slim.l2_regularizer(0.0004),
                activation_fn=leaky_relu,
                variables_collections=["flownet"],
                reuse=reuse):
            cnv1 = slim.conv2d(image, 16, [3, 3], stride=2, scope="cnv1")
            cnv2 = slim.conv2d(cnv1, 16, [3, 3], stride=1, scope="cnv2")
            cnv3 = slim.conv2d(cnv2, 32, [3, 3], stride=2, scope="cnv3")
            cnv4 = slim.conv2d(cnv3, 32, [3, 3], stride=1, scope="cnv4")
            cnv5 = slim.conv2d(cnv4, 64, [3, 3], stride=2, scope="cnv5")
            cnv6 = slim.conv2d(cnv5, 64, [3, 3], stride=1, scope="cnv6")
            cnv7 = slim.conv2d(cnv6, 96, [3, 3], stride=2, scope="cnv7")
            cnv8 = slim.conv2d(cnv7, 96, [3, 3], stride=1, scope="cnv8")
            cnv9 = slim.conv2d(cnv8, 128, [3, 3], stride=2, scope="cnv9")
            cnv10 = slim.conv2d(cnv9, 128, [3, 3], stride=1, scope="cnv10")
            cnv11 = slim.conv2d(cnv10, 192, [3, 3], stride=2, scope="cnv11")
            cnv12 = slim.conv2d(cnv11, 192, [3, 3], stride=1, scope="cnv12")

            return cnv2, cnv4, cnv6, cnv8, cnv10, cnv12

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [27]:
def leaky_relu(_x, alpha=0.1):
    pos = tf.nn.relu(_x)
    neg = alpha * (_x - abs(_x)) * 0.5

    return pos + neg

In [36]:
def construct_model_pwc_full(image1, image2, feature1, feature2):
    with tf.variable_scope('flow_net'):
        batch_size, H, W, color_channels = map(int, image1.get_shape()[0:4])

        #############################
        feature1_1, feature1_2, feature1_3, feature1_4, feature1_5, feature1_6 = feature1
        feature2_1, feature2_2, feature2_3, feature2_4, feature2_5, feature2_6 = feature2

        cv6 = cost_volumn(feature1_6, feature2_6, d=4)
        flow6, _ = optical_flow_decoder_dc(cv6, level=6)

        flow6to5 = tf.image.resize_bilinear(flow6,
                                            [H / (2**5), (W / (2**5))]) * 2.0
        feature2_5w = transformer_old(feature2_5, flow6to5, [H / 32, W / 32])
        cv5 = cost_volumn(feature1_5, feature2_5w, d=4)
        flow5, _ = optical_flow_decoder_dc(
            tf.concat(
                [cv5, feature1_5, flow6to5], axis=3), level=5)
        flow5 = flow5 + flow6to5

        flow5to4 = tf.image.resize_bilinear(flow5,
                                            [H / (2**4), (W / (2**4))]) * 2.0
        feature2_4w = transformer_old(feature2_4, flow5to4, [H / 16, W / 16])
        cv4 = cost_volumn(feature1_4, feature2_4w, d=4)
        flow4, _ = optical_flow_decoder_dc(
            tf.concat(
                [cv4, feature1_4, flow5to4], axis=3), level=4)
        flow4 = flow4 + flow5to4

        flow4to3 = tf.image.resize_bilinear(flow4,
                                            [H / (2**3), (W / (2**3))]) * 2.0
        feature2_3w = transformer_old(feature2_3, flow4to3, [H / 8, W / 8])
        cv3 = cost_volumn(feature1_3, feature2_3w, d=4)
        flow3, _ = optical_flow_decoder_dc(
            tf.concat(
                [cv3, feature1_3, flow4to3], axis=3), level=3)
        flow3 = flow3 + flow4to3

        flow3to2 = tf.image.resize_bilinear(flow3,
                                            [H / (2**2), (W / (2**2))]) * 2.0
        feature2_2w = transformer_old(feature2_2, flow3to2, [H / 4, W / 4])
        cv2 = cost_volumn(feature1_2, feature2_2w, d=4)
        flow2_raw, f2 = optical_flow_decoder_dc(
            tf.concat(
                [cv2, feature1_2, flow3to2], axis=3), level=2)
        flow2_raw = flow2_raw + flow3to2

        flow2 = context_net(tf.concat([flow2_raw, f2], axis=3)) + flow2_raw

        flow0_enlarge = tf.image.resize_bilinear(flow2 * 4.0, [H, W])
        flow1_enlarge = tf.image.resize_bilinear(flow3 * 4.0, [H // 2, W // 2])
        flow2_enlarge = tf.image.resize_bilinear(flow4 * 4.0, [H // 4, W // 4])
        flow3_enlarge = tf.image.resize_bilinear(flow5 * 4.0, [H // 8, W // 8])

        return flow0_enlarge, flow1_enlarge, flow2_enlarge, flow3_enlarge

In [37]:
def cost_volumn(feature1, feature2, d=4):
    batch_size, H, W, feature_num = map(int, feature1.get_shape()[0:4])
    feature2 = tf.pad(feature2, [[0, 0], [d, d], [d, d], [0, 0]], "CONSTANT")
    cv = []
    for i in range(2 * d + 1):
        for j in range(2 * d + 1):
            cv.append(
                tf.reduce_mean(
                    feature1 * feature2[:, i:(i + H), j:(j + W), :],
                    axis=3,
                    keep_dims=True))
    return tf.concat(cv, axis=3)

In [43]:
feature1 = feature_pyramid_flow(image1, reuse=True)
feature2 = feature_pyramid_flow(image2, reuse=True)

In [44]:
feature1_1, feature1_2, feature1_3, feature1_4, feature1_5, feature1_6 = feature1
feature2_1, feature2_2, feature2_3, feature2_4, feature2_5, feature2_6 = feature2

# cv6 = cost_volumn(feature1_6, feature2_6, d=4)
feature1 = feature1_6
feature2 = feature2_6
d=4
batch_size, H, W, feature_num = map(int, feature1.get_shape()[0:4])
feature2 = tf.pad(feature2, [[0, 0], [d, d], [d, d], [0, 0]], "CONSTANT")
cv = []
for i in range(2 * d + 1):
    for j in range(2 * d + 1):
        cv.append(
            tf.reduce_mean(
                feature1 * feature2[:, i:(i + H), j:(j + W), :],
                axis=3,
                keep_dims=True))
ret = tf.concat(cv, axis=3)

In [45]:
cv

[<tf.Tensor 'Mean_162:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_163:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_164:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_165:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_166:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_167:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_168:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_169:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_170:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_171:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_172:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_173:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_174:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_175:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_176:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_177:0' shape=(4, 4, 13, 1) dtype=float32>,
 <tf.Tensor 'Mean_178:0'