In [3]:
from __future__ import absolute_import, division, print_function

import os
import argparse
import time
import datetime
import sys
from tensorflow.python import pywrap_tensorflow
import tensorflow as tf

In [4]:
def average_gradients(tower_grads):

    average_grads = []
    for grad_and_vars in zip(*tower_grads):
    # Note that each grad_and_vars looks like the following:
    #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(axis=0, values=grads)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)

    return average_grads

In [5]:
class BtsDataloader(object):
    """bts dataloader"""

    def __init__(self, data_path, gt_path, filenames_file, params, mode,
                 do_rotate=False, degree=5.0, do_kb_crop=False):

        self.data_path = data_path
        self.gt_path = gt_path
        self.params = params
        self.mode = mode

        self.do_rotate = do_rotate
        self.degree = degree

        self.do_kb_crop = do_kb_crop

        with open(filenames_file, 'r') as f:
            filenames = f.readlines()

        if mode == 'train':
            assert not self.params.batch_size % self.params.num_gpus
            mini_batch_size = int(self.params.batch_size / self.params.num_gpus)

            self.loader = tf.data.Dataset.from_tensor_slices(filenames)
            self.loader = self.loader.apply(tf.contrib.data.shuffle_and_repeat(len(filenames)))
            self.loader = self.loader.map(self.parse_function_train, num_parallel_calls=params.num_threads)
            self.loader = self.loader.map(self.train_preprocess, num_parallel_calls=params.num_threads)
            self.loader = self.loader.batch(mini_batch_size)
            self.loader = self.loader.prefetch(mini_batch_size)

        else:
            self.loader = tf.data.Dataset.from_tensor_slices(filenames)
            self.loader = self.loader.map(self.parse_function_test, num_parallel_calls=1)
            self.loader = self.loader.map(self.test_preprocess, num_parallel_calls=1)
            self.loader = self.loader.batch(1)
            self.loader = self.loader.prefetch(1)

    def parse_function_test(self, line):
        split_line = tf.string_split([line]).values
        image_path = tf.string_join([self.data_path, split_line[0]])

        if self.params.dataset == 'nyu':
            image = tf.image.decode_jpeg(tf.read_file(image_path))
        else:
            image = tf.image.decode_png(tf.read_file(image_path))

        image = tf.image.convert_image_dtype(image, tf.float32)
        focal = tf.string_to_number(split_line[2])

        if self.do_kb_crop is True:
            height = tf.shape(image)[0]
            width = tf.shape(image)[1]
            top_margin = tf.to_int32(height - 352)
            left_margin = tf.to_int32((width - 1216) / 2)
            image = image[top_margin:top_margin + 352, left_margin:left_margin + 1216, :]

        return image, focal

    def test_preprocess(self, image, focal):

        image.set_shape([None, None, 3])
        
        image *= 255.0
        image = self.mean_image_subtraction(image, [123.68, 116.78, 103.94])

        if self.params.encoder == 'densenet161_bts' or self.params.encoder == 'densenet121_bts':
            image *= 0.017

        return image, focal

    def parse_function_train(self, line):
        split_line = tf.string_split([line]).values
        image_path = tf.string_join([self.data_path, split_line[0]])
        depth_gt_path = tf.string_join([self.gt_path, tf.string_strip(split_line[1])])

        if self.params.dataset == 'nyu':
            image = tf.image.decode_jpeg(tf.read_file(image_path))
        else:
            image = tf.image.decode_png(tf.read_file(image_path))

        depth_gt = tf.image.decode_png(tf.read_file(depth_gt_path), channels=0, dtype=tf.uint16)

        if self.params.dataset == 'nyu':
            depth_gt = tf.cast(depth_gt, tf.float32) / 1000.0
        else:
            depth_gt = tf.cast(depth_gt, tf.float32) / 256.0

        image = tf.image.convert_image_dtype(image, tf.float32)
        focal = tf.string_to_number(split_line[2])

        # To avoid blank boundaries due to pixel registration
        if self.params.dataset == 'nyu':
            depth_gt = depth_gt[45:472, 43:608, :]
            image = image[45:472, 43:608, :]

        if self.do_kb_crop is True:
            print('Cropping training images as kitti benchmark images')
            height = tf.shape(image)[0]
            width = tf.shape(image)[1]
            top_margin = tf.to_int32(height - 352)
            left_margin = tf.to_int32((width - 1216) / 2)
            depth_gt = depth_gt[top_margin:top_margin + 352, left_margin:left_margin + 1216, :]
            image = image[top_margin:top_margin + 352, left_margin:left_margin + 1216, :]

        if self.do_rotate is True:
            random_angle = tf.random_uniform([], - self.degree * 3.141592 / 180, self.degree * 3.141592 / 180)
            image = tf.contrib.image.rotate(image, random_angle, interpolation='BILINEAR')
            depth_gt = tf.contrib.image.rotate(depth_gt, random_angle, interpolation='NEAREST')

        print('Do random cropping from fixed size input')
        image, depth_gt = self.random_crop_fixed_size(image, depth_gt)

        return image, depth_gt, focal

    def train_preprocess(self, image, depth_gt, focal):
        # Random flipping
        do_flip = tf.random_uniform([], 0, 1)
        image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(image), lambda: image)
        depth_gt = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(depth_gt), lambda: depth_gt)

        # Random gamma, brightness, color augmentation
        do_augment = tf.random_uniform([], 0, 1)
        image = tf.cond(do_augment > 0.5, lambda: self.augment_image(image), lambda: image)

        image.set_shape([self.params.height, self.params.width, 3])
        depth_gt.set_shape([self.params.height, self.params.width, 1])
        
        image *= 255.0
        image = self.mean_image_subtraction(image, [123.68, 116.78, 103.94])

        if self.params.encoder == 'densenet161_bts' or self.params.encoder == 'densenet121_bts':
            image *= 0.017
        
        return image, depth_gt, focal

    def random_crop_fixed_size(self, image, depth_gt):
        image_depth = tf.concat([image, depth_gt], 2)
        image_depth_cropped = tf.random_crop(image_depth, [self.params.height, self.params.width, 4])

        image_cropped = image_depth_cropped[:, :, 0:3]
        depth_gt_cropped = tf.expand_dims(image_depth_cropped[:, :, 3], 2)

        return image_cropped, depth_gt_cropped

    def augment_image(self, image):
        # gamma augmentation
        gamma = tf.random_uniform([], 0.9, 1.1)
        image_aug = image ** gamma

        # brightness augmentation
        if self.params.dataset == 'nyu':
            brightness = tf.random_uniform([], 0.75, 1.25)
        else:
            brightness = tf.random_uniform([], 0.9, 1.1)
        image_aug = image_aug * brightness

        # color augmentation
        colors = tf.random_uniform([3], 0.9, 1.1)
        white = tf.ones([tf.shape(image)[0], tf.shape(image)[1]])
        color_image = tf.stack([white * colors[i] for i in range(3)], axis=2)
        image_aug *= color_image

        # clip
        if self.params.encoder == 'densenet161_bts' or self.params.encoder == 'densenet121_bts':
            image_aug = tf.clip_by_value(image_aug,  0, 1)
        else:
            image_aug = tf.clip_by_value(image_aug, 0, 255)

        return image_aug

    @staticmethod
    def mean_image_subtraction(image, means):
        """Subtracts the given means from each image channel.
        For example:
          means = [123.68, 116.779, 103.939]
          image = mean_image_subtraction(image, means)
        Note that the rank of `image` must be known.
        Args:
          image: a tensor of size [height, width, C].
          means: a C-vector of values to subtract from each channel.
        Returns:
          the centered image.
        Raises:
          ValueError: If the rank of `image` is unknown, if `image` has a rank other
            than three or if the number of channels in `image` doesn't match the
            number of values in `means`.
        """

        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        num_channels = image.get_shape().as_list()[-1]
        if len(means) != num_channels:
            raise ValueError('len(means) must match the number of channels')

        channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
        for i in range(num_channels):
            channels[i] -= means[i]
        return tf.concat(axis=2, values=channels)

In [6]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

In [7]:
def convert_arg_line_to_args(arg_line):
    for arg in arg_line.split():
        if not arg.strip():
            continue
        yield arg

In [8]:
mode='train'
model_name='bts_nyu_test'
encoder='densenet161_bts'
dataset='nyu'
data_path='../dataset/nyu_depth_v2/sync/'
gt_path ='../dataset/nyu_depth_v2/sync/'
filenames_file ='../train_test_inputs/nyudepthv2_train_files_with_gt_small.txt'
batch_size=1
num_epochs=1
learning_rate=1e-4
end_learning_rate=-1
num_gpus=1
num_threads=1
input_height=416
input_width=544
max_depth=10
do_random_rotate=True
degree=2.5
log_directory ='./models/'
pretrained_model= './models/densenet161_imagenet/model'
fix_first_conv_blocks = True
checkpoint_path =''

In [9]:
from collections import namedtuple
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import sys

In [10]:
sys.path.append("./custom_layer/")

In [11]:
import _local_planar_guidance_grad

In [12]:
lpg = tf.load_op_library('custom_layer/build/liblpg.so')

In [13]:
bts_parameters = namedtuple('parameters', 'encoder, '
                                          'height, width, '
                                          'max_depth, '
                                          'batch_size, '
                                          'dataset, '
                                          'num_gpus, '
                                          'num_threads, '
                                          'num_epochs, ')

In [14]:
class BtsModel(object):

    def __init__(self, params, mode, image, depth_gt, focal, reuse_variables=None, model_index=0, bn_training=False):
        self.params = params
        self.mode = mode
        self.max_depth = self.params.max_depth

        self.input_image = image
        self.depth_gt = depth_gt
        self.focal = tf.cast(focal, tf.float32)
        self.model_collection = ['model_' + str(model_index)]

        self.reuse_variables = reuse_variables
        self.bn_training = bn_training
        self.is_training = True if mode == 'train' else False

        self.build_model(net_input=self.input_image, reuse=self.reuse_variables)

        if self.mode == 'test':
            return

        self.build_losses()
        self.build_summaries()

    def upsample_nn(self, x, ratio):
        s = tf.shape(x)
        h = s[1]
        w = s[2]
        return tf.image.resize_nearest_neighbor(x, [h * ratio, w * ratio], align_corners=True)
    
    def downsample_nn(self, x, ratio):
        s = tf.shape(x)
        h = tf.cast(s[1] / ratio, tf.int32)
        w = tf.cast(s[2] / ratio, tf.int32)
        return tf.image.resize_nearest_neighbor(x, [h, w], align_corners=True)
    
    def conv(self, x, num_out_layers, kernel_size, stride, activation_fn=tf.nn.elu, normalizer_fn=None):
        p = np.floor((kernel_size - 1) / 2).astype(np.int32)
        p_x = tf.pad(x, [[0, 0], [p, p], [p, p], [0, 0]])
        return slim.conv2d(p_x, num_out_layers, kernel_size, stride, 'VALID', activation_fn=activation_fn, normalizer_fn=normalizer_fn)

    def atrous_conv(self, x, num_out_layers, kernel_size, rate, apply_bn_first=True):
        pk = np.floor((kernel_size - 1) / 2).astype(np.int32)
        pr = rate - 1
        p = pk + pr
        out = tf.pad(x, [[0, 0], [p, p], [p, p], [0, 0]])

        if apply_bn_first is True:
            out = slim.batch_norm(out)

        out = tf.nn.relu(out)
        out = slim.conv2d(out, num_out_layers * 2, 1, 1, 'VALID')
        out = slim.batch_norm(out)
        out = tf.nn.relu(out)
        out = slim.conv2d(out, num_out_layers, kernel_size=kernel_size, stride=1, rate=rate, padding='VALID',
                          activation_fn=None, normalizer_fn=None)

        return out

    def upconv(self, x, num_out_layers, kernel_size, scale, activation_fn=tf.nn.elu, normalizer_fn=None):
        upsample = self.upsample_nn(x, scale)
        conv = self.conv(upsample, num_out_layers, kernel_size, 1, activation_fn=activation_fn, normalizer_fn=normalizer_fn)
        return conv

    @slim.add_arg_scope
    def denseconv(self, x, num_filters, kernel_size, stride=1, dilation_rate=1, dropout_rate=None, scope=None):
        with tf.variable_scope(scope, 'xx', [x]) as sc:
            out = slim.batch_norm(x, is_training=False)
            out = tf.nn.relu(out)
            out = slim.conv2d(out, num_filters, kernel_size, rate=dilation_rate, activation_fn=None)
            if dropout_rate:
                out = tf.nn.dropout(out)
            return out

    @slim.add_arg_scope
    def denseconv_block(self, x, num_filters, dilation_rate=1, scope=None):
        with tf.variable_scope(scope, 'conv_blockx', [x]) as sc:
            out = self.denseconv(x, num_filters * 4, 1, scope='x1')
            out = self.denseconv(out, num_filters, 3, dilation_rate=dilation_rate, scope='x2')
            out = tf.concat([x, out], axis=3)
            return out

    @slim.add_arg_scope
    def dense_block(self, x, num_layers, num_filters, growth_rate, dilation_rate=1, grow_num_filters=True, scope=None):
        with tf.variable_scope(scope, 'dense_blockx', [x]) as sc:
            out = x
            for i in range(num_layers):
                branch = i + 1
                out = self.denseconv_block(out, growth_rate, dilation_rate=dilation_rate,
                                           scope='conv_block' + str(branch))
                if grow_num_filters:
                    num_filters += growth_rate
            return out, num_filters

    @slim.add_arg_scope
    def transition_block(self, x, num_filters, compression=1.0, do_pooling=True, scope=None):
        num_filters = int(num_filters * compression)
        with tf.variable_scope(scope, 'transition_blockx', [x]) as sc:
            out = self.denseconv(x, num_filters, 1, scope='blk')
            if do_pooling:
                out = slim.avg_pool2d(out, 2)
            return out, num_filters

    @slim.add_arg_scope
    def reduction_1x1(self, net, num_filters, is_final=False):
        while num_filters >= 4:
            if num_filters < 8:
                if is_final:
                    net = self.conv(net, 1, 1, 1, activation_fn=tf.nn.sigmoid)
                else:
                    net = self.conv(net, 3, 1, 1, activation_fn=None)
                    theta = tf.nn.sigmoid(net[:, :, :, 0]) * 3.1415926535 / 6
                    phi = tf.nn.sigmoid(net[:, :, :, 1]) * 3.1415926535 * 2
                    dist = tf.nn.sigmoid(net[:, :, :, 2]) * self.max_depth
                    n1 = tf.expand_dims(tf.multiply(tf.math.sin(theta), tf.math.cos(phi)), 3)
                    n2 = tf.expand_dims(tf.multiply(tf.math.sin(theta), tf.math.sin(phi)), 3)
                    n3 = tf.expand_dims(tf.math.cos(theta), 3)
                    n4 = tf.expand_dims(dist, 3)
                    net = tf.concat([n1, n2, n3, n4], axis=3)
                break
            else:
                net = self.conv(net, num_filters, 1, 1)

            num_filters = num_filters / 2

        return net

    def get_depth(self, x):
        depth = self.max_depth * self.conv(x, 1, 3, 1, tf.nn.sigmoid, normalizer_fn=None)
        if self.params.dataset == 'kitti':
            focal_expanded = tf.expand_dims(self.focal, 1)
            focal_expanded = tf.expand_dims(focal_expanded, 1)
            focal_expanded = tf.expand_dims(focal_expanded, 1)
            depth = depth * focal_expanded / 715.0873 # Average focal length in KITTI Eigen training set
        return depth
    
    def densenet(self, inputs, reduction=None, growth_rate=None, num_filters=None, num_layers=None, dropout_rate=None,
                 is_training=True, reuse=None, scope=None):

        assert reduction is not None
        assert growth_rate is not None
        assert num_filters is not None
        assert num_layers is not None

        compression = 1.0 - reduction
        num_dense_blocks = len(num_layers)

        batch_norm_params = {'is_training': False,
                             'scale': True,
                             'decay': 0.99,
                             'epsilon': 1.1e-5,
                             'fused': True, }

        with tf.variable_scope(scope, 'densenetxxx', [inputs], reuse=reuse) as sc:
            with slim.arg_scope([slim.dropout], is_training=is_training),\
                 slim.arg_scope([slim.batch_norm], **batch_norm_params),\
                 slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(1e-4), activation_fn=None, biases_initializer=None):

                skips = []

                net = inputs

                # Initial convolution
                net = slim.conv2d(net, num_filters, 7, stride=2, scope='conv1')  # H/2
                net = slim.batch_norm(net, is_training=False)
                net = tf.nn.relu(net)

                skips.append(net)

                net = slim.max_pool2d(net, 3, stride=2, padding='SAME')  # H/4
                skips.append(net)

                # Blocks
                for i in range(num_dense_blocks - 1):  # i:0 H/8, i:1 H/16, i:2 H/32
                    do_pooling = True
                    dilation_rate = 1

                    net, num_filters = self.dense_block(net, num_layers[i], num_filters, growth_rate,
                                                        dilation_rate=dilation_rate, scope='dense_block' + str(i + 1))

                    # Add transition_block
                    net, num_filters = self.transition_block(net, num_filters, compression=compression,
                                                             do_pooling=do_pooling,
                                                             scope='transition_block' + str(i + 1))
                    if i < num_dense_blocks - 2:
                        skips.append(net)

                net, num_filters = self.dense_block(net, num_layers[-1], num_filters, growth_rate,
                                                    scope='dense_block' + str(num_dense_blocks))

                with tf.variable_scope('final_block', [inputs]):
                    net = slim.batch_norm(net, is_training=False)
                    net = tf.nn.relu(net)

                return net, skips

    @slim.add_arg_scope
    def bts(self, dense_features, skips, num_filters=256):
        batch_norm_params = {'is_training': self.bn_training,
                             'scale': True,
                             'decay': 0.99,
                             'epsilon': 1.1e-5,
                             'fused': True, }

        with slim.arg_scope([slim.batch_norm], **batch_norm_params):
            conv = self.conv
            atrous_conv = self.atrous_conv
            upconv = self.upconv

            upconv5 = upconv(dense_features, num_filters, 3, 2)  # H/16
            upconv5 = slim.batch_norm(upconv5)
            concat5 = tf.concat([upconv5, skips[3]], 3)
            iconv5 = conv(concat5, num_filters, 3, 1)

            num_filters = num_filters / 2

            upconv4 = upconv(iconv5, num_filters, 3, 2)  # H/8
            upconv4 = slim.batch_norm(upconv4)
            concat4 = tf.concat([upconv4, skips[2]], 3)
            iconv4 = conv(concat4, num_filters, 3, 1)
            iconv4 = slim.batch_norm(iconv4)

            daspp_3 = atrous_conv(iconv4, num_filters / 2, kernel_size=3, rate=3, apply_bn_first=False)
            concat4_2 = tf.concat([concat4, daspp_3], 3)
            daspp_6 = atrous_conv(concat4_2, num_filters / 2, kernel_size=3, rate=6)
            concat4_3 = tf.concat([concat4_2, daspp_6], 3)
            daspp_12 = atrous_conv(concat4_3, num_filters / 2, kernel_size=3, rate=12)
            concat4_4 = tf.concat([concat4_3, daspp_12], 3)
            daspp_18 = atrous_conv(concat4_4, num_filters / 2, kernel_size=3, rate=18)
            concat4_5 = tf.concat([concat4_4, daspp_18], 3)
            daspp_24 = atrous_conv(concat4_5, num_filters / 2, kernel_size=3, rate=24)
            concat4_daspp = tf.concat([iconv4, daspp_3, daspp_6, daspp_12, daspp_18, daspp_24], 3)
            daspp_feat = conv(concat4_daspp, num_filters / 2, 3, 1)

            plane_eq_8x8 = self.reduction_1x1(daspp_feat, num_filters / 2)
            plane_normal_8x8 = tf.nn.l2_normalize(plane_eq_8x8[:, :, :, 0:3], axis=3)
            plane_dist_8x8 = plane_eq_8x8[:, :, :, 3]
            plane_eq_8x8 = tf.concat([plane_normal_8x8, tf.expand_dims(plane_dist_8x8, 3)], 3)
            depth_8x8 = lpg.local_planar_guidance(plane_eq_8x8, upratio=8, focal=self.focal)
            depth_8x8_scaled = tf.expand_dims(depth_8x8, 3) / self.max_depth
            depth_8x8_scaled_ds = self.downsample_nn(depth_8x8_scaled, 4)

            num_filters = num_filters / 2

            upconv3 = upconv(daspp_feat, num_filters, 3, 2)  # H/4
            upconv3 = slim.batch_norm(upconv3)
            concat3 = tf.concat([upconv3, skips[1], depth_8x8_scaled_ds], 3)
            iconv3 = conv(concat3, num_filters, 3, 1)

            plane_eq_4x4 = self.reduction_1x1(iconv3, num_filters / 2)
            plane_normal_4x4 = tf.nn.l2_normalize(plane_eq_4x4[:, :, :, 0:3], axis=3)
            plane_dist_4x4 = plane_eq_4x4[:, :, :, 3]
            plane_eq_4x4 = tf.concat([plane_normal_4x4, tf.expand_dims(plane_dist_4x4, 3)], 3)
            depth_4x4 = lpg.local_planar_guidance(plane_eq_4x4, upratio=4, focal=self.focal)
            depth_4x4_scaled = tf.expand_dims(depth_4x4, 3) / self.max_depth
            depth_4x4_scaled_ds = self.downsample_nn(depth_4x4_scaled, 2)

            num_filters = num_filters / 2

            upconv2 = upconv(iconv3, num_filters, 3, 2)  # H/2
            upconv2 = slim.batch_norm(upconv2)
            concat2 = tf.concat([upconv2, skips[0], depth_4x4_scaled_ds], 3)
            iconv2 = conv(concat2, num_filters, 3, 1)

            plane_eq_2x2 = self.reduction_1x1(iconv2, num_filters / 2)
            plane_normal_2x2 = tf.nn.l2_normalize(plane_eq_2x2[:, :, :, 0:3], axis=3)
            plane_dist_2x2 = plane_eq_2x2[:, :, :, 3]
            plane_eq_2x2 = tf.concat([plane_normal_2x2, tf.expand_dims(plane_dist_2x2, 3)], 3)
            depth_2x2 = lpg.local_planar_guidance(plane_eq_2x2, upratio=2, focal=self.focal)
            depth_2x2_scaled = tf.expand_dims(depth_2x2, 3) / self.max_depth

            num_filters = num_filters / 2

            upconv1 = upconv(iconv2, num_filters, 3, 2)  # H
            reduc1x1 = self.reduction_1x1(upconv1, num_filters, is_final=True)
            concat1 = tf.concat([upconv1, reduc1x1, depth_2x2_scaled, depth_4x4_scaled, depth_8x8_scaled], 3)
            iconv1 = conv(concat1, num_filters, 3, 1)

            self.depth_est = self.get_depth(iconv1)
            self.lpg2x2 = depth_2x2_scaled
            self.lpg4x4 = depth_4x4_scaled
            self.lpg8x8 = depth_8x8_scaled
            self.reduc1x1 = reduc1x1

            print("==================================")
            print(" upconv5 in/out: {} / {}".format(dense_features.shape[-1], upconv5.shape[-1]))
            print("  iconv5 in/out: {} / {}".format(concat5.shape[-1], iconv5.shape[-1]))
            print(" upconv4 in/out: {} / {}".format(iconv5.shape[-1], upconv4.shape[-1]))
            print("  iconv4 in/out: {} / {}".format(concat4.shape[-1], iconv4.shape[-1]))
            print("    aspp in/out: {} / {}".format(concat4_daspp.shape[-1], daspp_feat.shape[-1]))
            print("reduc8x8 in/out: {} / {}".format(daspp_feat.shape[-1], plane_eq_8x8.shape[-1]))
            print("  lpg8x8 in/out: {} / {}".format(plane_eq_8x8.shape[-1], 1))
            print(" upconv3 in/out: {} / {}".format(daspp_feat.shape[-1], upconv3.shape[-1]))
            print("  iconv3 in/out: {} / {}".format(concat3.shape[-1], iconv3.shape[-1]))
            print("reduc4x4 in/out: {} / {}".format(iconv3.shape[-1], plane_eq_4x4.shape[-1]))
            print("  lpg4x4 in/out: {} / {}".format(plane_eq_4x4.shape[-1], 1))
            print(" upconv2 in/out: {} / {}".format(iconv3.shape[-1], upconv2.shape[-1]))
            print("  iconv2 in/out: {} / {}".format(concat2.shape[-1], iconv2.shape[-1]))
            print("reduc2x2 in/out: {} / {}".format(iconv2.shape[-1], plane_eq_2x2.shape[-1]))
            print("  lpg2x2 in/out: {} / {}".format(plane_eq_2x2.shape[-1], 1))
            print(" upconv1 in/out: {} / {}".format(iconv2.shape[-1], upconv1.shape[-1]))
            print("reduc1x1 in/out: {} / {}".format(upconv1.shape[-1], reduc1x1.shape[-1]))
            print("  iconv1 in/out: {} / {}".format(concat1.shape[-1], iconv1.shape[-1]))
            print("   depth in/out: {} / {}".format(iconv1.shape[-1], self.depth_est.shape[-1]))
            print("==================================")

    def build_resnet101_bts(self, net_input, reuse):
        batch_norm_params = {
            'is_training': False,
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'fused': True,  # Use fused batch norm if possible.
        }
        with tf.variable_scope('encoder'):
            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=slim.l2_regularizer(1e-4),
                                weights_initializer=slim.variance_scaling_initializer(),
                                activation_fn=tf.nn.relu,
                                normalizer_fn=slim.batch_norm,
                                normalizer_params=batch_norm_params),\
                 slim.arg_scope([slim.batch_norm], **batch_norm_params),\
                 slim.arg_scope([slim.max_pool2d], padding='SAME'):

                dense_features, skips, endpoints = resnet_v1_101(net_input, global_pool=False, spatial_squeeze=False,
                                                                 is_training=self.is_training, reuse=reuse, scope='resnet101')

        with tf.variable_scope('decoder'):
            with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu):
                self.bts(dense_features, skips, num_filters=512)

    def build_resnet50_bts(self, net_input, reuse):
        batch_norm_params = {
            'is_training': False,
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'fused': True,  # Use fused batch norm if possible.
        }
        with tf.variable_scope('encoder'):
            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=slim.l2_regularizer(1e-4),
                                weights_initializer=slim.variance_scaling_initializer(),
                                activation_fn=tf.nn.relu,
                                normalizer_fn=slim.batch_norm,
                                normalizer_params=batch_norm_params), \
                 slim.arg_scope([slim.batch_norm], **batch_norm_params), \
                 slim.arg_scope([slim.max_pool2d], padding='SAME'):

                dense_features, skips, endpoints = resnet_v1_50(net_input, global_pool=False, spatial_squeeze=False,
                                                                is_training=self.is_training, reuse=reuse, scope='resnet50')

        with tf.variable_scope('decoder'):
            with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu):
                self.bts(dense_features, skips, num_filters=256)

    def build_densenet121_bts(self, net_input, reuse):
        with tf.variable_scope('encoder'):
            dense_features, skips = self.densenet(net_input, reduction=0.5, growth_rate=32,
                                                  num_filters=self.num_filters, num_layers=[6, 12, 24, 16],
                                                  is_training=self.is_training, reuse=reuse, scope='densenet121')

        with tf.variable_scope('decoder'):
            with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu):
                self.bts(dense_features, skips, num_filters=256)

    def build_densenet161_bts(self, net_input, reuse):
        with tf.variable_scope('encoder'):
            dense_features, skips = self.densenet(net_input, reduction=0.5, growth_rate=48,
                                                  num_filters=self.num_filters, num_layers=[6, 12, 36, 24],
                                                  is_training=self.is_training, reuse=reuse, scope='densenet161')

        with tf.variable_scope('decoder'):
            with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu):
                self.bts(dense_features, skips, num_filters=512)

    def build_model(self, net_input, reuse):
        with tf.variable_scope('model', reuse=reuse):
            if self.params.encoder == 'densenet161_bts':
                self.num_filters = 96
                self.build_densenet161_bts(net_input=net_input, reuse=reuse)
            elif self.params.encoder == 'densenet121_bts':
                self.num_filters = 64
                self.build_densenet121_bts(net_input=net_input, reuse=reuse)
            elif self.params.encoder == 'resnet101_bts':
                self.build_resnet101_bts(net_input=net_input, reuse=reuse)
            elif self.params.encoder == 'resnet50_bts':
                self.build_resnet50_bts(net_input=net_input, reuse=reuse)
            else:
                return None

    def build_losses(self):
        with tf.variable_scope('losses', reuse=self.reuse_variables):

            if self.params.dataset == 'nyu':
                self.mask = self.depth_gt > 0.1
            else:
                self.mask = self.depth_gt > 1.0

            depth_gt_masked = tf.boolean_mask(self.depth_gt, self.mask)
            depth_est_masked = tf.boolean_mask(self.depth_est, self.mask)

            d = tf.log(depth_est_masked) - tf.log(depth_gt_masked)  # Best

            self.silog_loss = tf.sqrt(tf.reduce_mean(d ** 2) - 0.85 * (tf.reduce_mean(d) ** 2)) * 10.0
            self.total_loss = self.silog_loss

    def build_summaries(self):
        with tf.device('/cpu:0'):
            tf.summary.scalar('silog_loss', self.silog_loss, collections=self.model_collection)
            depth_gt = tf.where(self.depth_gt < 1e-3, self.depth_gt * 0 + 1e3, self.depth_gt)
            tf.summary.image('depth_gt', 1 / depth_gt, max_outputs=4, collections=self.model_collection)
            tf.summary.image('depth_est', 1 / self.depth_est, max_outputs=4, collections=self.model_collection)
            tf.summary.image('reduc1x1', 1 / self.reduc1x1, max_outputs=4, collections=self.model_collection)
            tf.summary.image('lpg2x2', 1 / self.lpg2x2, max_outputs=4, collections=self.model_collection)
            tf.summary.image('lpg4x4', 1 / self.lpg4x4, max_outputs=4, collections=self.model_collection)
            tf.summary.image('lpg8x8', 1 / self.lpg8x8, max_outputs=4, collections=self.model_collection)
            tf.summary.image('image', self.input_image, max_outputs=4, collections=self.model_collection)
            

In [15]:
def get_num_lines(file_path):
    f = open(file_path, 'r')
    lines = f.readlines()
    f.close()
    return len(lines)

In [16]:
def get_tensors_in_checkpoint_file(file_name, all_tensors=True, tensor_name=None):
    varlist = []
    var_value = []
    reader = pywrap_tensorflow.NewCheckpointReader(file_name)
    if all_tensors:
      var_to_shape_map = reader.get_variable_to_shape_map()
      for key in sorted(var_to_shape_map):
        varlist.append(key)
        var_value.append(reader.get_tensor(key))
    else:
        varlist.append(tensor_name)
        var_value.append(reader.get_tensor(tensor_name))
    return (varlist, var_value)

In [17]:
def build_tensors_in_checkpoint_file(loaded_tensors):
    full_var_list = list()
    var_check = set()
    # Loop all loaded tensors
    for i, tensor_name in enumerate(loaded_tensors[0]):
        # Extract tensor
        try:
            tensor_aux = tf.get_default_graph().get_tensor_by_name(tensor_name+":0")
        except:
            print(tensor_name + ' is in pretrained model but not in current training model')
        if tensor_aux not in var_check:
            full_var_list.append(tensor_aux)
            var_check.add(tensor_aux)
    return full_var_list


In [18]:
model_filename = model_name + '.py'
command = 'mkdir ' + log_directory + '/' + model_name
os.system(command)

256

In [19]:
custom_layer_path = log_directory + '/' + model_name + '/' + 'custom_layer'
command = 'mkdir ' + custom_layer_path
os.system(command)

256

In [20]:
command = 'cp ' + './custom_layer/* ' + custom_layer_path + '/'
os.system(command)

256

In [21]:
sys.argv

['/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py',
 '-f',
 '/root/.local/share/jupyter/runtime/kernel-bac5534f-ba0c-4753-bb17-567bd9a23109.json']

In [22]:
args_out_path = log_directory + '/' + model_name + '/' + sys.argv[1]
command = 'cp ' + sys.argv[1] + ' ' + args_out_path
os.system(command)

256

In [23]:
model_out_path = log_directory + '/' + model_name + '/' + model_filename
command = 'cp bts.py ' + model_out_path
os.system(command)

0

In [24]:
global_step = tf.Variable(0, trainable=False)
num_training_samples = get_num_lines(filenames_file)
steps_per_epoch = np.ceil(num_training_samples / batch_size).astype(np.int32)
num_total_steps = num_epochs * steps_per_epoch
print(num_training_samples,steps_per_epoch,num_total_steps)

Instructions for updating:
Colocations handled automatically by placer.
100 100 100


In [25]:
start_learning_rate = learning_rate
end_learning_rate = start_learning_rate * 0.1
learning_rate = tf.train.polynomial_decay(start_learning_rate, global_step, num_total_steps, end_learning_rate, 0.9)
opt_step = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8)

print("Total number of samples: {}".format(num_training_samples))
print("Total number of steps: {}".format(num_total_steps))

Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Total number of samples: 100
Total number of steps: 100


In [26]:
if fix_first_conv_blocks or fix_first_conv_block:
    if fix_first_conv_blocks:
        print('Fixing first two conv blocks')
    else:
        print('Fixing first conv block')

Fixing first two conv blocks


In [27]:
params = bts_parameters(
        encoder=encoder,
        height=input_height,
        width=input_width,
        batch_size=batch_size,
        dataset=dataset,
        max_depth=max_depth,
        num_gpus=num_gpus,
        num_threads=num_threads,
        num_epochs=num_epochs)

In [28]:
dataloader = BtsDataloader(data_path, gt_path, filenames_file,params, mode,
                           do_rotate=do_random_rotate, degree=degree,
                           do_kb_crop=False)

Instructions for updating:
Use `tf.data.experimental.shuffle_and_repeat(...)`.
Do random cropping from fixed size input


In [29]:
dataloader_iter = dataloader.loader.make_initializable_iterator()
iter_init_op = dataloader_iter.initializer

In [30]:
tower_grads = []
tower_losses = []
reuse_variables = None

with tf.variable_scope(tf.get_variable_scope()):
    for i in range(num_gpus):
        with tf.device('/gpu:%d' % i):
            image, depth_gt, focal = dataloader_iter.get_next()
            model = BtsModel(params, mode, image, depth_gt, focal=focal,
                             reuse_variables=reuse_variables, model_index=i, bn_training=False)

            loss = model.total_loss
            tower_losses.append(loss)

            reuse_variables = True

            if fix_first_conv_blocks or fix_first_conv_block:
                trainable_vars = tf.trainable_variables()
                if encoder == 'resnet101_bts' or encoder == 'resnet50_bts':
                    first_conv_name = encoder.replace('_bts', '') + '/conv1'
                    if fix_first_conv_blocks:
                        g_vars = [var for var in
                                  trainable_vars if (first_conv_name or 'block1' or 'block2') not in var.name]
                    else:
                        g_vars = [var for var in
                                  trainable_vars if (first_conv_name or 'block1') not in var.name]
                else:
                    if fix_first_conv_blocks:
                        g_vars = [var for var in
                                  trainable_vars if ('conv1' or 'dense_block1' or 'dense_block2' or 'transition_block1' or 'transition_block2') not in var.name]
                    else:
                        g_vars = [var for var in
                                  trainable_vars if ('dense_block1' or 'transition_block1') not in var.name]
            else:
                g_vars = None

            grads = opt_step.compute_gradients(loss, var_list=g_vars)

            tower_grads.append(grads)

 upconv5 in/out: 2208 / 512
  iconv5 in/out: 896 / 512
 upconv4 in/out: 512 / 256
  iconv4 in/out: 448 / 256
    aspp in/out: 896 / 128
reduc8x8 in/out: 128 / 4
  lpg8x8 in/out: 4 / 1
 upconv3 in/out: 128 / 128
  iconv3 in/out: 225 / 128
reduc4x4 in/out: 128 / 4
  lpg4x4 in/out: 4 / 1
 upconv2 in/out: 128 / 64
  iconv2 in/out: 161 / 64
reduc2x2 in/out: 64 / 4
  lpg2x2 in/out: 4 / 1
 upconv1 in/out: 64 / 32
reduc1x1 in/out: 32 / 1
  iconv1 in/out: 36 / 32
   depth in/out: 32 / 1
Instructions for updating:
Use tf.cast instead.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [31]:
with tf.variable_scope(tf.get_variable_scope()):
    with tf.device('/gpu:%d' % (num_gpus - 1)):
        grads = average_gradients(tower_grads)
        apply_gradient_op = opt_step.apply_gradients(grads, global_step=global_step)
        total_loss = tf.reduce_mean(tower_losses)

In [32]:
tf.summary.scalar('learning_rate', learning_rate, ['model_0'])
tf.summary.scalar('total_loss', total_loss, ['model_0'])
summary_op = tf.summary.merge_all('model_0')

In [33]:
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

summary_writer = tf.summary.FileWriter(log_directory + '/' + model_name, sess.graph)
train_saver = tf.train.Saver(max_to_keep=200)

total_num_parameters = 0
for variable in tf.trainable_variables():
    total_num_parameters += np.array(variable.get_shape().as_list()).prod()

print("Total number of trainable parameters: {}".format(total_num_parameters))

Total number of trainable parameters: 47006235


In [34]:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

coordinator = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)

if pretrained_model != '':
    vars_to_restore = get_tensors_in_checkpoint_file(file_name=pretrained_model)
    tensors_to_load = build_tensors_in_checkpoint_file(vars_to_restore)
    loader = tf.train.Saver(tensors_to_load)
    loader.restore(sess, pretrained_model)

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
model/encoder/densenet161/logits/biases is in pretrained model but not in current training model
model/encoder/densenet161/logits/weights is in pretrained model but not in current training model
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./models/densenet161_imagenet/model


In [35]:
start_step = global_step.eval(session=sess)
start_time = time.time()
duration = 0
should_init_iter_op = False
if mode == 'train':
    should_init_iter_op = True
for step in range(start_step, num_total_steps):
    before_op_time = time.time()
    if step % steps_per_epoch == 0 or should_init_iter_op is True:
        sess.run(iter_init_op)
        should_init_iter_op = False

    _, lr, loss_value = sess.run([apply_gradient_op, learning_rate, total_loss])

    print('step: {}/{}, lr: {:.12f}, loss: {:.12f}'.format(step, num_total_steps, lr, loss_value))

    duration += time.time() - before_op_time
    if step and step % 100 == 0:
        examples_per_sec = params.batch_size / duration * 100
        duration = 0
        time_sofar = (time.time() - start_time) / 3600
        training_time_left = (num_total_steps / step - 1.0) * time_sofar
        print('%s:' % model_name)
        print_string = 'examples/s: {:4.2f} | loss: {:.5f} | time elapsed: {:.2f}h | time left: {:.2f}h'
        print(print_string.format(examples_per_sec, loss_value, time_sofar, training_time_left))
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, global_step=step)
        summary_writer.flush()

    if step and step % 500 == 0:
        train_saver.save(sess, log_directory + '/' + model_name + '/model', global_step=step)

step: 0/100, lr: 0.000100000005, loss: 4.742500305176
step: 1/100, lr: 0.000099189594, loss: 7.766571044922
step: 2/100, lr: 0.000098378368, loss: 2.032992362976
step: 3/100, lr: 0.000097566313, loss: 2.815767288208
step: 4/100, lr: 0.000096753429, loss: 3.986785173416
step: 5/100, lr: 0.000095939686, loss: 4.646549224854
step: 6/100, lr: 0.000095125099, loss: 3.015668392181
step: 7/100, lr: 0.000094309624, loss: 6.179144382477
step: 8/100, lr: 0.000093493290, loss: 4.419931888580
step: 9/100, lr: 0.000092676069, loss: 3.328349113464
step: 10/100, lr: 0.000091857932, loss: 2.569943904877
step: 11/100, lr: 0.000091038892, loss: 4.058022975922
step: 12/100, lr: 0.000090218949, loss: 4.076035022736
step: 13/100, lr: 0.000089398047, loss: 2.677053213120
step: 14/100, lr: 0.000088576213, loss: 2.092865705490
step: 15/100, lr: 0.000087753433, loss: 3.993928432465
step: 16/100, lr: 0.000086929678, loss: 1.601414084435
step: 17/100, lr: 0.000086104934, loss: 3.369278669357
step: 18/100, lr: 0.

In [36]:
train_saver.save(sess, log_directory + '/' + model_name + '/model', global_step=num_total_steps)
print('%s training finished' % model_name)
print(datetime.datetime.now())

bts_nyu_test training finished
2022-05-23 06:10:01.086563
