In [1]:
import os
import sys
import itertools
from time import time
os.environ['TF_XLA_FLAGS'] = "--tf_xla_auto_jit=fusible"
#os.environ['TF_XLA_FLAGS'] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit"
from tqdm.notebook import tqdm
sys.path.append('..')
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices, 'GPU')

tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
import horovod.tensorflow as hvd
hvd.init()

from mask_rcnn.hyperparameters import dataset_params
from mask_rcnn.hyperparameters import mask_rcnn_params
from mask_rcnn import dataset_utils
from simple_model import load_weights

from mask_rcnn.models import resnet
from mask_rcnn.models import fpn

train_file_pattern = '/workspace/shared_workspace/data/coco/tf_record/train*'
MODELS = dict()
batch_size = 1

data_params = dataset_params.get_data_params()
params = mask_rcnn_params.default_config().values()

data_params['batch_size'] = batch_size
params['finetune_bn'] = False
params['train_batch_size'] = batch_size
params['l2_weight_decay'] = 1e-4
params['init_learning_rate'] = 1e-4 * batch_size
params['warmup_learning_rate'] = 1e-3 * batch_size
params['warmup_steps'] = 500
params['learning_rate_steps'] = [30000,40000]
params['learning_rate_levels'] = [1e-4 * batch_size, 1e-5 * batch_size]
params['momentum'] = 0.9
params['use_batched_nms'] = True

In [2]:
train_input_fn = dataset_utils.FastDataLoader(train_file_pattern, data_params)
train_tdf = train_input_fn(data_params)

[MaskRCNN] INFO    : Using Dataset Sharding with Horovod


In [3]:
tdf_iter = train_tdf.make_initializable_iterator()
features, labels = tdf_iter.get_next()


Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_initializable_iterator(dataset)`.


In [4]:
train_input_fn_0 = dataset_utils.FastDataLoader(train_file_pattern, data_params)
train_tdf_0 = train_input_fn(data_params)

train_input_fn_1 = dataset_utils.FastDataLoader(train_file_pattern, data_params)
train_tdf_1 = train_input_fn(data_params)

def map0(features, labels):
    features['images'] = features['images'][:,:,:672,:]
    return features, labels
    
def map1(features, labels):
    features['images'] = features['images'][:,:,672:,:]
    return features, labels

train_tdf_0 = train_tdf_0.map(map0, num_parallel_calls=tf.data.experimental.AUTOTUNE) \
                       .prefetch(tf.data.experimental.AUTOTUNE)

train_tdf_1 = train_tdf_1.map(map1, num_parallel_calls=tf.data.experimental.AUTOTUNE) \
                       .prefetch(tf.data.experimental.AUTOTUNE)

[MaskRCNN] INFO    : Using Dataset Sharding with Horovod
[MaskRCNN] INFO    : Using Dataset Sharding with Horovod


In [5]:
tdf_iter_0 = train_tdf_0.make_initializable_iterator()
features_0, labels_0 = tdf_iter_0.get_next()

tdf_iter_1 = train_tdf_1.make_initializable_iterator()
features_1, labels_1 = tdf_iter_0.get_next()

In [16]:
def backbone(features_0, features_1,  params, labels=None, is_training=True):
    
    with tf.device('/gpu:0'):
        MODELS["backbone_0"] = resnet.Resnet_Model(
            "resnet50",
            data_format='channels_last',
            trainable=is_training,
            finetune_bn=params['finetune_bn']
        )
        MODELS["FPN_0"] = fpn.FPNNetwork(params['min_level'], params['max_level'], trainable=is_training)
        backbone_feats_0 = MODELS["backbone_0"](
            features_0['images'],
            training=is_training,
        )
        fpn_feats_0 = MODELS["FPN_0"](backbone_feats_0, training=is_training)
    
    with tf.device('/gpu:6'):
        MODELS["backbone_1"] = resnet.Resnet_Model(
            "resnet50",
            data_format='channels_last',
            trainable=is_training,
            finetune_bn=params['finetune_bn']
        )
        MODELS["FPN_1"] = fpn.FPNNetwork(params['min_level'], params['max_level'], trainable=is_training)
        backbone_feats_1 = MODELS["backbone_1"](
            features_1['images'],
            training=is_training,
        )
        fpn_feats_1 = MODELS["FPN_1"](backbone_feats_1, training=is_training)
    
    with tf.device('/gpu:0'):
        fpn_feats_0 = {i: tf.concat([j,k], axis=2) for (i,j),k \
             in zip(fpn_feats_0.items(), fpn_feats_1.values())}
    
    with tf.device('/gpu:1'):
        fpn_feats_1 = {i: tf.concat([j,k], axis=2) for (i,j),k \
             in zip(fpn_feats_0.items(), fpn_feats_1.values())}
    
    '''MODELS["backbone"] = resnet.Resnet_Model(
            "resnet50",
            data_format='channels_last',
            trainable=is_training,
            finetune_bn=params['finetune_bn']
        )
    MODELS["FPN"] = fpn.FPNNetwork(params['min_level'], params['max_level'], trainable=is_training)
        
    backbone_feats = MODELS["backbone"](
            features['images'],
            training=is_training,
        )
    fpn_feats = MODELS["FPN"](backbone_feats, training=is_training)'''
    return fpn_feats_0, fpn_feats_1

In [17]:
fpn_feats = backbone(features_0, features_1, params, labels_0)

In [18]:
var_list = load_weights.build_assigment_map('resnet50/')
checkpoint_file = tf.train.latest_checkpoint('/model/resnet/resnet-nhwc-2018-02-07/')
_init_op, _init_feed_dict = load_weights.assign_from_checkpoint(checkpoint_file, var_list)

In [19]:
var_initializer = tf.global_variables_initializer()
progressbar = tqdm(range(10000))
with tf.Session() as sess:
    sess.run(_init_op, _init_feed_dict)
    sess.run(tdf_iter.initializer)
    sess.run(tdf_iter_0.initializer)
    sess.run(tdf_iter_1.initializer)
    sess.run(var_initializer)
    for i in progressbar:
        result = sess.run(fpn_feats)

HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))

[GPU 00] Restoring pretrained weights (265 Tensors) from: /model/resnet/resnet-nhwc-2018-02-07/model.ckpt-112603



In [20]:
fpn_feats_0

NameError: name 'fpn_feats_0' is not defined

In [10]:
fpn_feats_1

{5: <tf.Tensor 'fpn_1/post_hoc_d5/BiasAdd:0' shape=(6, 26, 21, 256) dtype=float32>,
 4: <tf.Tensor 'fpn_1/post_hoc_d4/BiasAdd:0' shape=(6, 52, 42, 256) dtype=float32>,
 3: <tf.Tensor 'fpn_1/post_hoc_d3/BiasAdd:0' shape=(6, 104, 84, 256) dtype=float32>,
 2: <tf.Tensor 'fpn_1/post_hoc_d2/BiasAdd:0' shape=(6, 208, 168, 256) dtype=float32>,
 6: <tf.Tensor 'fpn_1/p6/MaxPool:0' shape=(6, 13, 11, 256) dtype=float32>}

In [19]:
fpn_feats

{5: <tf.Tensor 'concat:0' shape=(6, 26, 42, 256) dtype=float32>,
 4: <tf.Tensor 'concat_1:0' shape=(6, 52, 84, 256) dtype=float32>,
 3: <tf.Tensor 'concat_2:0' shape=(6, 104, 168, 256) dtype=float32>,
 2: <tf.Tensor 'concat_3:0' shape=(6, 208, 336, 256) dtype=float32>,
 6: <tf.Tensor 'concat_4:0' shape=(6, 13, 22, 256) dtype=float32>}