In [1]:
from deeplab import common, model
from deeplab.utils import train_utils
from deeplab.core import utils

from tensorflow.keras.utils import OrderedEnqueuer
from tensorflow.contrib import slim as contrib_slim
from slim.nets import resnet_utils

import tensorflow as tf
print(tf.__version__)

from lib import dataloader

from albumentations import *
import cv2
import numpy as np

# %config IPCompleter.greedy=True

%load_ext autoreload
%autoreload 2

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('f', '', 'kernel')

1.12.3


## set parameters

In [2]:
crop_size = [256, 256]
outputs_to_num_classes = {'semantic': 3}

model_options = common.ModelOptions(
    outputs_to_num_classes,
    crop_size,
    output_stride=8
)._replace(
    add_image_level_feature=True,
    aspp_with_batch_norm=True,
    aspp_with_separable_conv=False,
    decoder_use_separable_conv=False,
    decoder_output_is_logits=True,
    logits_kernel_size=1,
    decoder_output_stride=[4,2],
    multi_grid=[1,2],
    atrous_rates=[2,4,6],
    model_variant='resnet_mod') 

In [3]:
for key, value in model_options._asdict().items():
    print(key, value)

outputs_to_num_classes {'semantic': 3}
crop_size [256, 256]
atrous_rates [2, 4, 6]
output_stride 8
preprocessed_images_dtype <dtype: 'float32'>
merge_method max
add_image_level_feature True
image_pooling_crop_size None
image_pooling_stride [1, 1]
aspp_with_batch_norm True
aspp_with_separable_conv False
multi_grid [1, 2]
decoder_output_stride [4, 2]
decoder_use_separable_conv False
logits_kernel_size 1
model_variant resnet_mod
depth_multiplier 1.0
divisible_by None
prediction_with_upsampled_logits True
dense_prediction_cell_config None
nas_architecture_options {'nas_stem_output_num_conv_filters': 20, 'nas_use_classification_head': False, 'nas_remove_os32_stride': False}
use_bounded_activation False
aspp_with_concat_projection True
aspp_with_squeeze_and_excitation False
aspp_convs_filters 256
decoder_use_sum_merge False
decoder_filters 256
decoder_output_is_logits True
image_se_uses_qsigmoid False
label_weights 1.0
sync_batch_norm_method None
batch_norm_decay 0.9997


## load sample dataset

In [4]:
PATH = '/home/lis-paul/data/dsb2018/dsb2018_sub1/'
X_trn, Y_trn, X_val, Y_val = dataloader.load_img_dir(PATH)

number of images: 292
- training:       248
- validation:      44


In [5]:
# ranf=list(np.random.ranf(8)+10*np.random.ranf(1))
AUG = Compose([
#         CoarseDropout(max_holes=64, max_height=8, max_width=8, min_holes=None, min_height=None, min_width=None, 
#                   fill_value=ranf, always_apply=False, p=.9),
        GaussNoise(var_limit=(0.0, 0.05), mean=0, p=.5),
        GaussianBlur(blur_limit=3, p=.5),
        Flip(p=0.5),
        ShiftScaleRotate(shift_limit=0, scale_limit=(0.2, 1), rotate_limit=15, 
                         interpolation=0, border_mode=cv2.BORDER_REFLECT_101, value=0, mask_value=0, p=1),
        ElasticTransform(alpha=100, sigma=10, alpha_affine=1, p=0.7, 
                         interpolation=0, border_mode=cv2.BORDER_REFLECT_101, value=0, mask_value=0),
        RandomCrop(256, 256, always_apply=True, p=1.0)
    ], p=0.9)

AUG_val = RandomCrop(256, 256, always_apply=True, p=1.0)

In [6]:
train_dl = dataloader.Dataloader(X_trn, Y_trn, batch_size=16, patch_size=(256,256), augmenter=AUG, shuffle=True)
val_dl   = dataloader.Dataloader(X_val, Y_val, batch_size=4, patch_size=(256,256), augmenter=AUG_val, shuffle=False)

In [7]:
# xx, yy = train_dl[0]

# for arr in [xx, yy[0], yy[1], yy[2]]:
#     dataloader.show_some_data(arr)

In [8]:
xx,yy = train_dl[0]
print(xx.dtype, yy[0].dtype, yy[1].dtype, yy[2].dtype)
print(xx.shape, yy[0].shape, yy[1].shape, yy[2].shape)


uint8 float64 float64 float64
(16, 256, 256) (16, 256, 256) (16, 256, 256) (16, 256, 256)


In [9]:
np.stack((yy[1], yy[2]), axis=-1).shape

(16, 256, 256, 2)

In [10]:
# train_generator = SegmentationMultiGenerator(datasets, folder) # My keras.utils.sequence object

def generator():
    multi_enqueuer = OrderedEnqueuer(train_dl, use_multiprocessing=True)
    multi_enqueuer.start(workers=8, max_queue_size=8)
    while True:
        xx, yy = next(multi_enqueuer.get())
        yield xx[...,np.newaxis], yy[0,...,np.newaxis], np.stack((yy[1], yy[2]),axis=-1)
        
        

dataset = tf.data.Dataset.from_generator(generator,
                                         output_types=(tf.uint8, tf.float32, tf.float32),
                                         output_shapes=(tf.TensorShape([None, 256, 256, 1]),
                                                        tf.TensorShape([None, 256, 256, 1]),
                                                        tf.TensorShape([None, 256, 256, 2]))
                                        )

itr = dataset.make_one_shot_iterator()
sample = itr.get_next()

## build network

In [11]:
# imgs = np.random.uniform.ipynb_checkpoints/size=(4, crop_size[0], crop_size[1], 1))
# imgs = tf.random_uniform((4, crop_size[0], crop_size[1], 1))
# print(xx.shape)
# logits = model.multi_scale_logits(sample[0],
#                        model_options,
#                        image_pyramid=[1.0],
#                        weight_decay=0.0001,
#                        is_training=True,
#                        fine_tune_batch_norm=False,
#                        nas_training_hyper_parameters=None)
# print(logits['semantic']['merged_logits'].shape)

logits = model._get_logits(
        sample[0],
        model_options=model_options,
        weight_decay=0.0001,
        reuse=tf.AUTO_REUSE,
        is_training=True,
        fine_tune_batch_norm=True)
print(logits)

{'semantic': <tf.Tensor 'semantic:0' shape=(?, 128, 128, 256) dtype=float32>}


In [12]:
# g = tf.get_default_graph()
# with g.as_default():
#     with tf.Session(graph=g) as sess:
#         sess.run(tf.global_variables_initializer())
#         output0 = sess.run(logits)
#         writer = tf.summary.FileWriter("output3/logit_resnet_v1_50_beta", sess.graph)
#         writer.close()

In [13]:
logits

{'semantic': <tf.Tensor 'semantic:0' shape=(?, 128, 128, 256) dtype=float32>}

In [14]:
arg_scope = resnet_utils.resnet_arg_scope(
        weight_decay=0.0001,
        batch_norm_decay=0.95,
        batch_norm_epsilon=1e-5,
        batch_norm_scale=True)

In [15]:
## upsample the output logits
# logits = utils.resize_bilinear(logits['semantic']['merged_logits'],
#                                crop_size,
#                                logits['semantic']['merged_logits'].dtype)
logits = tf.image.resize_bilinear(
          logits['semantic'],
          crop_size,
          align_corners=True)
print(logits.shape)

with tf.variable_scope("prob"):
#     with contrib_slim.arg_scope(arg_scope):
    prob = contrib_slim.conv2d(logits, 1, [1, 1], stride=1,
                             scope='logit_prob')
    prob_loss = tf.losses.sigmoid_cross_entropy(
                sample[1], 
                prob, 
                weights=1.0, 
                label_smoothing=0, 
                scope="bce_prob",
                loss_collection=tf.GraphKeys.LOSSES, 
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
            )

with tf.variable_scope("grad"):
    grad = contrib_slim.conv2d(logits, 2, [1, 1], stride=1,
                             scope='logit_grad')
    grad_loss = tf.losses.mean_squared_error(
                sample[2],
                grad,
                weights=1.0,
                scope="mse_grad",
                loss_collection=tf.GraphKeys.LOSSES,
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
            ) 
print(prob.shape, grad.shape)

In [17]:
total_loss = tf.add(prob_loss , grad_loss*5)
tf.losses.add_loss(total_loss)

In [18]:
tf.losses.get_losses()

[<tf.Tensor 'prob/bce_prob/value:0' shape=() dtype=float32>,
 <tf.Tensor 'grad/mse_grad/value:0' shape=() dtype=float32>,
 <tf.Tensor 'Add:0' shape=() dtype=float32>]

In [19]:
g = tf.get_default_graph()
with g.as_default():
    with tf.Session(graph=g) as sess:
#         img = tf.random_uniform(
#                 (4, crop_size[0], crop_size[1], 1))
#         img = tf.convert_to_tensor(xx[...,np.newaxis], dtype=tf.uint8)
#         print(img.shape)
#         print(img.dtype)
#         outputs_to_scales_to_logits = model.multi_scale_logits(
#                                         inputs,
#                                         model_options,
#                                         image_pyramid=[1.0])

        sess.run(tf.global_variables_initializer())
#         outputs_to_scales_to_logits = sess.run(outputs_to_logits, feed_dict={'input_layer:0': img.eval(), 'training:0':True})
#         outputs = sess.run(outputs_scaled, feed_dict={'input_layer:0': img.eval(), 'training:0':True})
#         output0 = sess.run(logits)
        ploss = sess.run(prob_loss)
        gloss = sess.run(grad_loss)
        tloss = sess.run(total_loss)
        print(ploss.shape, gloss.shape, tloss.shape)
#         print((output0['semantic']['merged_logits']).shape)
        writer = tf.summary.FileWriter("output3/total", sess.graph)
    
#         tf.summary.scalar('yaw_total_loss', yaw_total_loss)
#         tf.summary.scalar('pitch_total_loss', pitch_total_loss)
#         tf.summary.scalar('roll_total_loss', roll_total_loss)
    
        writer.close()

() () ()


In [19]:
# Add name to graph node so we can add to summary.
output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity(
                        output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE)num_pixels

NameError: name 'outputs_to_scales_to_logits' is not defined

In [None]:
import six
for output, num_classes in six.iteritems(outputs_to_num_classes):
    print(output, num_classes)
    print(outputs_to_scales_to_logits[output])

## loss scope

print(samples[common.LABEL].shape)

print(model_options.label_weights)

print(FLAGS.upsample_logits)

(?, 513, 513, 1)

1.0

True

In [13]:
yy.shape

(3, 8, 256, 256)

In [14]:
# ##network output 
# for output, num_classes in six.iteritems(outputs_to_num_classes):
#     train_utils.add_softmax_cross_entropy_loss_for_each_scale(
#         outputs_to_scales_to_logits[output],
#         tf.convert_to_tensor(yy[0,...,np.newaxis], dtype=tf.uint8),
#         3,
#         255,
#         loss_weight=model_options.label_weights,
#         upsample_logits=True,
#         hard_example_mining_step=0,
#         top_k_percent_pixels=1.0,
#         scope=output)

In [15]:
output = 'semantic'
logits = outputs_to_scales_to_logits[output]['merged_logits']
labels = tf.convert_to_tensor(yy[0,...,np.newaxis], dtype=tf.uint8)
num_classes = 3
ignore_label = 255
gt_is_matting_map = False
loss_weight=model_options.label_weights
upsample_logits=True
hard_example_mining_step=0
top_k_percent_pixels=1.0
scope=output

In [16]:
print(loss_weight)

1.0


In [17]:
from deeplab.core import preprocess_utils, utils
scale='scale1'
loss_scope = None
if scope:
    loss_scope = '%s_%s' % (scope, scale)

if upsample_logits:
    # Label is not downsampled, and instead we upsample logits.
    logits = tf.image.resize_bilinear(
                logits,
                preprocess_utils.resolve_shape(labels, 4)[1:3],
                align_corners=True)
    scaled_labels = labels
else:
  # Label is downsampled to the same size as logits.
  # When gt_is_matting_map = true, label downsampling with nearest neighbor
  # method may introduce artifacts. However, to avoid ignore_label from
  # being interpolated with other labels, we still perform nearest neighbor
  # interpolation.
  # TODO(huizhongc): Change to bilinear interpolation by processing padded
  # and non-padded label separately

    scaled_labels = tf.image.resize_nearest_neighbor(
                        labels,
                        preprocess_utils.resolve_shape(logits, 4)[1:3],
                        align_corners=True)

scaled_labels = tf.reshape(scaled_labels, shape=[-1])
weights = utils.get_label_weight_mask(
    scaled_labels, ignore_label, num_classes, label_weights=loss_weight)
# Dimension of keep_mask is equal to the total number of pixels.
keep_mask = tf.cast(
    tf.not_equal(scaled_labels, ignore_label), dtype=tf.float32)
print(keep_mask)
train_labels = None
print(f'logits before:{logits}')
logits = tf.reshape(logits, shape=[-1, num_classes])
print(f'logits after:{logits}')

Tensor("Cast_1:0", shape=(524288,), dtype=float32)
logits before:Tensor("ResizeBilinear_2:0", shape=(8, 256, 256, 3), dtype=float32)
logits after:Tensor("Reshape_1:0", shape=(524288, 3), dtype=float32)


In [18]:
train_labels = tf.one_hot(
      scaled_labels, num_classes, on_value=1.0, off_value=0.0)

In [19]:
def _div_maybe_zero(total_loss, num_present):
  """Normalizes the total loss with the number of present pixels."""
  return tf.to_float(num_present > 0) * tf.math.divide(
      total_loss,
      tf.maximum(1e-5, num_present))

In [20]:
default_loss_scope = ('softmax_all_pixel_loss'
                      if top_k_percent_pixels == 1.0 else
                      'softmax_hard_example_mining')
with tf.name_scope(loss_scope, default_loss_scope,
                   [logits, train_labels, weights]):
  # Compute the loss for all pixels.
  pixel_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
      labels=tf.stop_gradient(
          train_labels, name='train_labels_stop_gradient'),
      logits=logits,
      name='pixel_losses')
  weighted_pixel_losses = tf.multiply(pixel_losses, weights)
  print(pixel_losses)
  if top_k_percent_pixels == 1.0:
    total_loss = tf.reduce_sum(weighted_pixel_losses)
    print(total_loss)
    num_present = tf.reduce_sum(keep_mask)
    print(num_present)
    loss = _div_maybe_zero(total_loss, num_present)
    print(loss)
    tf.losses.add_loss(loss)
  else:
    num_pixels = tf.to_float(tf.shape(logits)[0])
    # Compute the top_k_percent pixels based on current training step.
    if hard_example_mining_step == 0:
      # Directly focus on the top_k pixels.
      top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels)
    else:
      # Gradually reduce the mining percent to top_k_percent_pixels.
      global_step = tf.to_float(tf.train.get_or_create_global_step())
      ratio = tf.minimum(1.0, global_step / hard_example_mining_step)
      top_k_pixels = tf.to_int32(
          (ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels)
    top_k_losses, _ = tf.nn.top_k(weighted_pixel_losses,
                                  k=top_k_pixels,
                                  sorted=True,
                                  name='top_k_percent_pixels')
    
    total_loss = tf.reduce_sum(top_k_losses)
    num_present = tf.reduce_sum(
        tf.to_float(tf.not_equal(top_k_losses, 0.0)))
    loss = _div_maybe_zero(total_loss, num_present)
    tf.losses.add_loss(loss)

Tensor("semantic_scale1/pixel_losses/Reshape_2:0", shape=(524288,), dtype=float32)
Tensor("semantic_scale1/Sum:0", shape=(), dtype=float32)
Tensor("semantic_scale1/Sum_1:0", shape=(), dtype=float32)
Tensor("semantic_scale1/mul_1:0", shape=(), dtype=float32)


In [21]:
dir(tf.GraphKeys)

['ACTIVATIONS',
 'ASSET_FILEPATHS',
 'BIASES',
 'CONCATENATED_VARIABLES',
 'COND_CONTEXT',
 'EVAL_STEP',
 'GLOBAL_STEP',
 'GLOBAL_VARIABLES',
 'INIT_OP',
 'LOCAL_INIT_OP',
 'LOCAL_RESOURCES',
 'LOCAL_VARIABLES',
 'LOSSES',
 'METRIC_VARIABLES',
 'MODEL_VARIABLES',
 'MOVING_AVERAGE_VARIABLES',
 'QUEUE_RUNNERS',
 'READY_FOR_LOCAL_INIT_OP',
 'READY_OP',
 'REGULARIZATION_LOSSES',
 'RESOURCES',
 'SAVEABLE_OBJECTS',
 'SAVERS',
 'SUMMARIES',
 'SUMMARY_OP',
 'TABLE_INITIALIZERS',
 'TRAINABLE_RESOURCE_VARIABLES',
 'TRAINABLE_VARIABLES',
 'TRAIN_OP',
 'UPDATE_OPS',
 'VARIABLES',
 'WEIGHTS',
 'WHILE_CONTEXT',
 '_STREAMING_MODEL_PORTS',
 '_SUMMARY_COLLECTION',
 '_VARIABLE_COLLECTIONS',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__

In [22]:
tf.get_collection(tf.GraphKeys.UPDATE_OPS)

[]

In [23]:
tf.get_collection(tf.GraphKeys.LOSSES)

[<tf.Tensor 'semantic_scale1/mul_1:0' shape=() dtype=float32>]

In [27]:
g = tf.get_default_graph()
with g.as_default():
    with tf.Session(graph=g) as sess:
#         img = tf.random_uniform(
#                 (4, crop_size[0], crop_size[1], 1))
        img = tf.convert_to_tensor(xx[...,np.newaxis], dtype=tf.uint8)
        print(img.shape)
        print(img.dtype)
#         outputs_to_scales_to_logits = model.multi_scale_logits(
#                                         inputs,
#                                         model_options,
#                                         image_pyramid=[1.0])

        sess.run(tf.global_variables_initializer())
#         logits = sess.run(logits)
#         outputs_to_scales_to_logits = sess.run(outputs_to_logits, feed_dict={'input_layer:0': img.eval(), 'training:0':True})
        outputs = sess.run(logits, feed_dict={'input_layer:0': img.eval(), 'training:0':False})
#         loss = sess.run(loss)
#         output0 = sess.run(outputs_to_scales_to_logits)
#         print(output0)
        writer = tf.summary.FileWriter("output3/test3", sess.graph)
        writer.close()

(8, 256, 256, 1)
<dtype: 'uint8'>


TypeError: Cannot interpret feed_dict key as Tensor: The name 'input_layer:0' refers to a Tensor which does not exist. The operation, 'input_layer', does not exist in the graph.