In [1]:
import tensorflow as tf
from functools import partial
import matplotlib.pyplot as plt

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("Device:", tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print("Number of replicas:", strategy.num_replicas_in_sync)

Number of replicas: 1


In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
GCS_PATH = "/data/datasets/saket/SeeingThroughFogData/train_clear_day/*.swedentfrecord"
BATCH_SIZE = 64
IMAGE_SIZE = [1024, 1024]

In [12]:
import os

import tensorflow as tf
from datasets import dataset_utils

slim = tf.contrib.slim


def get_split(split_name, dataset_dir, file_pattern, reader,
              split_to_sizes, items_to_descriptions, num_classes):
    """Gets a dataset tuple with instructions for reading Pascal VOC dataset.

    Args:
      split_name: A train/test split name.
      dataset_dir: The base directory of the dataset sources.
      file_pattern: The file pattern to use when matching the dataset sources.
        It is assumed that the pattern contains a '%s' string so that the split
        name can be inserted.
      reader: The TensorFlow reader type.

    Returns:
      A `Dataset` namedtuple.

    Raises:
        ValueError: if `split_name` is not a valid train/test split.
    """
    if split_name not in split_to_sizes:
        raise ValueError('split name %s was not recognized.' % split_name)
    file_pattern = os.path.join(dataset_dir, file_pattern)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    # change
    keys_to_features= {
        #'key': tf.VarLenFeature(dtype=tf.int64),
        #'name': tf.FixedLenFeature((), tf.string),
        'image/cam_stereo_left_lut': tf.FixedLenFeature((), tf.string),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
        'image/object/class/text': tf.VarLenFeature(dtype=tf.int64),
        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/angle': tf.VarLenFeature(dtype=tf.float32),
        'image/object/truncation': tf.VarLenFeature(dtype=tf.float32),
        'image/object/occlusion': tf.VarLenFeature(dtype=tf.int64),
        'image/object/object/bbox3d/height': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/width': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/length': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/x': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/y': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/z': tf.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/alpha3d': tf.VarLenFeature(dtype=tf.float32),

        #'image/key': tf.FixedLenFeature((), tf.string),
        'image/shape/cam_stereo_left_lut': tf.FixedLenFeature([3], tf.int64),
        #'lidar/point_key': tf.VarLenFeature(dtype=tf.float32),
        #'lidar/shape': tf.VarLenFeature(dtype=tf.int64),
        #'gated/key': tf.FixedLenFeature((), tf.string),
        #'gated/shape': tf.VarLenFeature(dtype=tf.int64),
    }
    # change
    items_to_handlers = {
        #'key': slim.tfexample_decoder.Tensor('key'),
        #'name': slim.tfexample_decoder.Tensor('name'),
        'image': slim.tfexample_decoder.Image('image/cam_stereo_left_lut'),
        'shape': slim.tfexample_decoder.Tensor('image/shape/cam_stereo_left_lut'),
        'class/text': slim.tfexample_decoder.Tensor('image/object/class/text'),
        'object/bbox': slim.tfexample_decoder.BoundingBox(
                ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
        #'bbox/angle': slim.tfexample_decoder.Tensor('image/object/bbox/angle'),
        #'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
        #'object/truncation': slim.tfexample_decoder.Tensor('image/object/truncation'),
        #'object/occlusion': slim.tfexample_decoder.Tensor('image/object/occlusion'),
        #'object/bbox3d': slim.tfexample_decoder.BoundingBox(
        #    ['height', 'width', 'length', 'x','y','z'],'image/object/object/bbox3d/'),
        #'bbox3d/alpha3d': slim.tfexample_decoder.Image('image/object/object/bbox3d/alpha3d'),
        
    }
    decoder = slim.tfexample_decoder.TFExampleDecoder(
        keys_to_features, items_to_handlers)
    print(decoder)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)
    # else:
    #     labels_to_names = create_readable_names_for_imagenet_labels()
    #     dataset_utils.write_label_file(labels_to_names, dataset_dir)

    return slim.dataset.Dataset(
            data_sources=file_pattern,
            reader=reader,
            decoder=decoder,
            num_samples=split_to_sizes[split_name],
            items_to_descriptions=items_to_descriptions,
            num_classes=num_classes,
            labels_to_names=labels_to_names)


In [13]:
num_classes = 8
file_pattern = '*.swedentfrecord'
split_name = "train_clear_day"
dataset_dir = "/data/datasets/saket/SeeingThroughFogData/train_clear_day"
reader = tf.TFRecordReader
split_to_sizes = {
    'train_clear_day': 2183,
    #'validation': 50000,
}
items_to_descriptions = {
    'image_data': 'A color image of varying height and width.',
    'gated_data': 'Gated camera images of varying height and width.',
    'lidar_data': 'Lidar Data .bin files.',
    'image_shape': 'Shape of the image',
    'lidar_shape': 'Shape of the Lidar data',
    'gated_shape': 'Shape of the Gated camera image',
    'label': 'Common labels for image, gated, and lidar data',
    'name': 'Entry ID (Files name used for training)',
    'total_id': 'Total ID',
}


get_split(split_name, dataset_dir, file_pattern, reader,
              split_to_sizes, items_to_descriptions, num_classes)

<tensorflow.contrib.slim.python.slim.data.tfexample_decoder.TFExampleDecoder object at 0x7f7386be8510>


<tensorflow.contrib.slim.python.slim.data.dataset.Dataset at 0x7f7386be8e90>

In [11]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets import cifar10
from datasets import imagenet
#change
from datasets import fullseeingthroughfogdataset

from datasets import pascalvoc_2007
from datasets import pascalvoc_2012

# change
datasets_map = {
    'cifar10': cifar10,
    'imagenet': imagenet,
    'pascalvoc_2007': pascalvoc_2007,
    'pascalvoc_2012': pascalvoc_2012,
    'fullseeingthroughfogdataset': fullseeingthroughfogdataset,
}


def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):
    """Given a dataset name and a split_name returns a Dataset.

    Args:
        name: String, the name of the dataset.
        split_name: A train/test split name.
        dataset_dir: The directory where the dataset files are stored.
        file_pattern: The file pattern to use for matching the dataset source files.
        reader: The subclass of tf.ReaderBase. If left as `None`, then the default
            reader defined by each dataset is used.
    Returns:
        A `Dataset` class.
    Raises:
        ValueError: If the dataset `name` is unknown.
    """
    #split_name = "train_clear_day"
    #dataset_dir = "/data/datasets/saket/SeeingThroughFogData"

    #print(datasets_map[name].get_split(file_pattern))

    if name not in datasets_map:
        raise ValueError('Name of dataset unknown %s' % name)
    return datasets_map[name].get_split(split_name,
                                        dataset_dir,
                                        file_pattern,
                                        reader)

In [1]:
# Copyright 2016 Paul Balanca. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generic training script that trains a SSD model using a given dataset."""
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops

from datasets import dataset_factory
from deployment import model_deploy
from nets import nets_factory
from preprocessing import preprocessing_factory
import tf_utils

slim = tf.contrib.slim
#device= 'cpu'
DATA_FORMAT = 'NCHW'
print("Num GPUs Available: ", len( tf.config.experimental.list_physical_devices('GPU')))

# =========================================================================== #
# SSD Network flags.
# =========================================================================== #
tf.app.flags.DEFINE_float(
    'loss_alpha', 1., 'Alpha parameter in the loss function.')
tf.app.flags.DEFINE_float(
    'negative_ratio', 3., 'Negative ratio in the loss function.')
tf.app.flags.DEFINE_float(
    'match_threshold', 0.5, 'Matching threshold in the loss function.')

# =========================================================================== #
# General Flags.
# =========================================================================== #
tf.app.flags.DEFINE_string(
    'train_dir', '/tmp/tfmodel/',
    'Directory where checkpoints and event logs are written to.')
tf.app.flags.DEFINE_integer('num_clones', 1,
                            'Number of model clones to deploy.')
tf.app.flags.DEFINE_boolean('clone_on_cpu', False,
                            'Use CPUs to deploy clones.')
tf.app.flags.DEFINE_integer(
    'num_readers', 4,
    'The number of parallel readers that read data from the dataset.')
tf.app.flags.DEFINE_integer(
    'num_preprocessing_threads', 4,
    'The number of threads used to create the batches.')

tf.app.flags.DEFINE_integer(
    'log_every_n_steps', 10,
    'The frequency with which logs are print.')
tf.app.flags.DEFINE_integer(
    'save_summaries_secs', 600,
    'The frequency with which summaries are saved, in seconds.')
tf.app.flags.DEFINE_integer(
    'save_interval_secs', 600,
    'The frequency with which the model is saved, in seconds.')
tf.app.flags.DEFINE_float(
    'gpu_memory_fraction', 0.8, 'GPU memory fraction to use.')

# =========================================================================== #
# Optimization Flags.
# =========================================================================== #
tf.app.flags.DEFINE_float(
    'weight_decay', 0.00004, 'The weight decay on the model weights.')
tf.app.flags.DEFINE_string(
    'optimizer', 'rmsprop',
    'The name of the optimizer, one of "adadelta", "adagrad", "adam",'
    '"ftrl", "momentum", "sgd" or "rmsprop".')
tf.app.flags.DEFINE_float(
    'adadelta_rho', 0.95,
    'The decay rate for adadelta.')
tf.app.flags.DEFINE_float(
    'adagrad_initial_accumulator_value', 0.1,
    'Starting value for the AdaGrad accumulators.')
tf.app.flags.DEFINE_float(
    'adam_beta1', 0.9,
    'The exponential decay rate for the 1st moment estimates.')
tf.app.flags.DEFINE_float(
    'adam_beta2', 0.999,
    'The exponential decay rate for the 2nd moment estimates.')
tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.')
tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5,
                          'The learning rate power.')
tf.app.flags.DEFINE_float(
    'ftrl_initial_accumulator_value', 0.1,
    'Starting value for the FTRL accumulators.')
tf.app.flags.DEFINE_float(
    'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.')
tf.app.flags.DEFINE_float(
    'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.')
tf.app.flags.DEFINE_float(
    'momentum', 0.9,
    'The momentum for the MomentumOptimizer and RMSPropOptimizer.')
tf.app.flags.DEFINE_float('rmsprop_momentum', 0.9, 'Momentum.')
tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.')

# =========================================================================== #
# Learning Rate Flags.
# =========================================================================== #
tf.app.flags.DEFINE_string(
    'learning_rate_decay_type',
    'exponential',
    'Specifies how the learning rate is decayed. One of "fixed", "exponential",'
    ' or "polynomial"')
tf.app.flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
tf.app.flags.DEFINE_float(
    'end_learning_rate', 0.0001,
    'The minimal end learning rate used by a polynomial decay learning rate.')
tf.app.flags.DEFINE_float(
    'label_smoothing', 0.0, 'The amount of label smoothing.')
tf.app.flags.DEFINE_float(
    'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.')
tf.app.flags.DEFINE_float(
    'num_epochs_per_decay', 2.0,
    'Number of epochs after which learning rate decays.')
tf.app.flags.DEFINE_float(
    'moving_average_decay', None,
    'The decay to use for the moving average.'
    'If left as None, then moving averages are not used.')

# =========================================================================== #
# Dataset Flags.
# =========================================================================== #
# change
tf.app.flags.DEFINE_string(
    'dataset_name', 'fullseeingthroughfogdataset', 'The name of the dataset to load.')
# change
tf.app.flags.DEFINE_integer(
    'num_classes', 8, 'Number of classes to use in the dataset.')
# change
tf.app.flags.DEFINE_string(
    'dataset_split_name', 'train_clear_day', 'The name of the train/test split.')
tf.app.flags.DEFINE_string(
    'dataset_dir', None, 'The directory where the dataset files are stored.')
tf.app.flags.DEFINE_integer(
    'labels_offset', 0,
    'An offset for the labels in the dataset. This flag is primarily used to '
    'evaluate the VGG and ResNet architectures which do not use a background '
    'class for the ImageNet dataset.')
tf.app.flags.DEFINE_string(
    'model_name', 'ssd_512_vgg', 'The name of the architecture to train.')
tf.app.flags.DEFINE_string(
    'preprocessing_name', None, 'The name of the preprocessing to use. If left '
    'as `None`, then the model_name flag is used.')
tf.app.flags.DEFINE_integer(
    'batch_size', 32, 'The number of samples in each batch.')
tf.app.flags.DEFINE_integer(
    'train_image_size', None, 'Train image size')
tf.app.flags.DEFINE_integer('max_number_of_steps', None,
                            'The maximum number of training steps.')

# =========================================================================== #
# Fine-Tuning Flags.
# =========================================================================== #
tf.app.flags.DEFINE_string(
    'checkpoint_path', None,
    'The path to a checkpoint from which to fine-tune.')
tf.app.flags.DEFINE_string(
    'checkpoint_model_scope', None,
    'Model scope in the checkpoint. None if the same as the trained model.')
tf.app.flags.DEFINE_string(
    'checkpoint_exclude_scopes', None,
    'Comma-separated list of scopes of variables to exclude when restoring '
    'from a checkpoint.')
tf.app.flags.DEFINE_string(
    'trainable_scopes', None,
    'Comma-separated list of scopes to filter the set of variables to train.'
    'By default, None would train all the variables.')
tf.app.flags.DEFINE_boolean(
    'ignore_missing_vars', False,
    'When restoring a checkpoint would ignore missing variables.')

FLAGS = tf.app.flags.FLAGS


# =========================================================================== #
# Main training routine.
# =========================================================================== #
def main(_):
    FLAGS.dataset_dir = "/data/datasets/saket/SeeingThroughFogData/train_clear_day"
    if not FLAGS.dataset_dir:
        raise ValueError('You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.Graph().as_default():
        # Config model_deploy. Keep TF Slim Models structure.
        # Useful if want to need multiple GPUs and/or servers in the future.
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=0,
            num_replicas=1,
            num_ps_tasks=0)
        # Create global_step.
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        # Select the dataset.
        dataset = dataset_factory.get_dataset(
            FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

        # Get the SSD network and its anchors.
        ssd_class = nets_factory.get_network(FLAGS.model_name)
        ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
        ssd_net = ssd_class(ssd_params)
        ssd_shape = ssd_net.params.img_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        # Select the preprocessing function.
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        tf_utils.print_configuration(FLAGS.__flags, ssd_params,
                                     dataset.data_sources, FLAGS.train_dir)
        # =================================================================== #
        # Create a dataset provider and batches.
        # =================================================================== #
        with tf.device(deploy_config.inputs_device()):
            with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    num_readers=FLAGS.num_readers,
                    common_queue_capacity=20 * FLAGS.batch_size,
                    common_queue_min=10 * FLAGS.batch_size,
                    shuffle=True)
            # Get for SSD network: image, labels, bboxes.
            # change
            [image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
                                                             'class/text',
                                                             'object/bbox'])
            print("Image:",image)
            print("Shape:",shape)
            print("glabels:", glabels)
            print("gbboxes:",gbboxes)
            # Pre-processing image, labels and bboxes.
            image, glabels, gbboxes = \
                image_preprocessing_fn(image, glabels, gbboxes,
                                       out_shape=ssd_shape,
                                       data_format=DATA_FORMAT)
            print('***')
            print("Image:",image)
            #print("Shape:",shape)
            print("glabels:", glabels)
            print("gbboxes:",gbboxes)
            # Encode groundtruth labels and bboxes.
            gclasses, glocalisations, gscores = \
                ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
            batch_shape = [1] + [len(ssd_anchors)] * 3
            print('##')
            print('gclasses:', gclasses)
            print('glocalization:', glocalisations)
            print('gscores:', gscores)
            # Training batches and queue.
            r = tf.train.batch(
                tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(r, batch_shape)

            # Intermediate queueing: unique batch computation pipeline for all
            # GPUs running the training.
            batch_queue = slim.prefetch_queue.prefetch_queue(
                tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]),
                capacity=2 * deploy_config.num_clones)

        # =================================================================== #
        # Define the model running on every GPU.
        # =================================================================== #
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple
            clones of network_fn."""
            # Dequeue batch.
            b_image, b_gclasses, b_glocalisations, b_gscores = \
                tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)

            # Construct SSD network.
            arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay,
                                          data_format=DATA_FORMAT)
            with slim.arg_scope(arg_scope):
                predictions, localisations, logits, end_points = \
                    ssd_net.net(b_image, is_training=True)
            # Add loss function.
            ssd_net.losses(logits, localisations,
                           b_gclasses, b_glocalisations, b_gscores,
                           match_threshold=FLAGS.match_threshold,
                           negative_ratio=FLAGS.negative_ratio,
                           alpha=FLAGS.loss_alpha,
                           label_smoothing=FLAGS.label_smoothing)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # =================================================================== #
        # Add summaries from first clone.
        # =================================================================== #
        clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                            tf.nn.zero_fraction(x)))
        # Add summaries for losses and extra losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))
        for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
            summaries.add(tf.summary.scalar(loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        # =================================================================== #
        # Configure the moving averages.
        # =================================================================== #
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        # =================================================================== #
        # Configure the optimization procedure.
        # =================================================================== #
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = tf_utils.configure_learning_rate(FLAGS,
                                                             dataset.num_samples,
                                                             global_step)
            optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = tf_utils.get_variables_to_train(FLAGS)

        # and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones,
            optimizer,
            var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                           first_clone_scope))
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # =================================================================== #
        # Kicks off the training.
        # =================================================================== #
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)
        saver = tf.train.Saver(max_to_keep=5,
                               keep_checkpoint_every_n_hours=1.0,
                               write_version=2,
                               pad_step_number=False)
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master='',
            is_chief=True,
            init_fn=tf_utils.get_init_fn(FLAGS),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            saver=saver,
            save_interval_secs=FLAGS.save_interval_secs,
            session_config=config,
            sync_optimizer=None)


if __name__ == '__main__':
    tf.app.run()


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Num GPUs Available:  0
Instructions for updating:
Please switch to tf.train.create_global_step


W0323 12:28:24.464266 139898556168000 deprecation.py:323] From <ipython-input-1-a5c4cd3199c3>:205: create_global_step (from tensorflow.contrib.framework.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.create_global_step





W0323 12:28:24.474351 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/datasets/fullseeingthroughfogdataset_common.py:61: The name tf.FixedLenFeature is deprecated. Please use tf.io.FixedLenFeature instead.






W0323 12:28:24.476263 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/datasets/fullseeingthroughfogdataset_common.py:63: The name tf.VarLenFeature is deprecated. Please use tf.io.VarLenFeature instead.






W0323 12:28:24.478122 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/datasets/dataset_utils.py:111: The name tf.gfile.Exists is deprecated. Please use tf.io.gfile.exists instead.




# Training | Evaluation flags:
{'?': <absl.app.HelpFlag object at 0x7f3bf6541890>,
 'adadelta_rho': <absl.flags._flag.Flag object at 0x7f3bf653d310>,
 'adagrad_initial_accumulator_value': <absl.flags._flag.Flag object at 0x7f3bf653d3d0>,
 'adam_beta1': <absl.flags._flag.Flag object at 0x7f3bf653d490>,
 'adam_beta2': <absl.flags._flag.Flag object at 0x7f3bf653d550>,
 'alsologtostderr': <absl.flags._flag.BooleanFlag object at 0x7f3c02fdf450>,
 'batch_size': <absl.flags._flag.Flag object at 0x7f3bf6541590>,
 'checkpoint_exclude_scopes': <absl.flags._flag.Flag object at 0x7f3bf6541850>,
 'checkpoint_model_scope': <absl.flags._flag.Flag object at 0x7f3bf65417d0>,
 'checkpoint_path': <absl.flags._flag.Flag object at 0x7f3bf6541750>,
 'clone_on_cpu': <absl.flags._flag.BooleanFlag object at 0x7f3bf783c950>,
 'dataset_dir': <absl.flags._flag.Flag object at 0x7f3bf6541350>,
 'dataset_name': <absl.flags._flag.Flag object at 0x7f3bf6541150>,
 'dataset_split_name': <absl.flags._flag.Flag object at

W0323 12:28:24.596531 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/contrib/slim/python/slim/data/parallel_reader.py:246: string_input_producer (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.


W0323 12:28:24.604600 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/input.py:277: input_producer (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.


W0323 12:28:24.606816 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/input.py:189: limit_epochs (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


W0323 12:28:24.609724 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/input.py:198: QueueRunner.__init__ (from tensorflow.python.training.queue_runner_impl) is deprecated and will be removed in a future version.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


W0323 12:28:24.612179 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/input.py:198: add_queue_runner (from tensorflow.python.training.queue_runner_impl) is deprecated and will be removed in a future version.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecordDataset`.


W0323 12:28:24.620273 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/contrib/slim/python/slim/data/parallel_reader.py:95: TFRecordReader.__init__ (from tensorflow.python.ops.io_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecordDataset`.


Image: Tensor("fullseeingthroughfogdataset_data_provider/case/cond/Merge:0", shape=(?, ?, 3), dtype=uint8, device=/device:CPU:0)
Shape: Tensor("fullseeingthroughfogdataset_data_provider/Reshape_2:0", shape=(3,), dtype=int64, device=/device:CPU:0)
glabels: Tensor("fullseeingthroughfogdataset_data_provider/SparseToDense:0", shape=(?,), dtype=int64, device=/device:CPU:0)
gbboxes: Tensor("fullseeingthroughfogdataset_data_provider/transpose:0", shape=(?, 4), dtype=float32, device=/device:CPU:0)



W0323 12:28:24.700066 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/preprocessing/ssd_vgg_preprocessing.py:101: The name tf.summary.image is deprecated. Please use tf.compat.v1.summary.image instead.



Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.


W0323 12:28:24.704546 139898556168000 deprecation.py:323] From /home/saket/Dense/SSD/preprocessing/ssd_vgg_preprocessing.py:219: sample_distorted_bounding_box (from tensorflow.python.ops.image_ops_impl) is deprecated and will be removed in a future version.
Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


W0323 12:28:24.763853 139898556168000 deprecation.py:323] From /home/saket/Dense/SSD/tf_extended/math.py:38: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where





W0323 12:28:24.794939 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/preprocessing/tf_image.py:275: The name tf.image.resize_images is deprecated. Please use tf.image.resize instead.






W0323 12:28:24.823558 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/preprocessing/ssd_vgg_preprocessing.py:116: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



***
Image: Tensor("ssd_preprocessing_train/transpose:0", shape=(3, 512, 512), dtype=float32, device=/device:CPU:0)
glabels: Tensor("ssd_preprocessing_train/distorted_bounding_box_crop/bboxes_filter/boolean_mask/GatherV2:0", shape=(?,), dtype=int64, device=/device:CPU:0)
gbboxes: Tensor("ssd_preprocessing_train/random_flip_left_right/cond_1/Merge:0", shape=(?, 4), dtype=float32, device=/device:CPU:0)
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


W0323 12:28:24.953328 139898556168000 deprecation.py:323] From /home/saket/Dense/SSD/nets/ssd_common.py:77: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.





W0323 12:28:24.993057 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/nets/ssd_common.py:152: The name tf.log is deprecated. Please use tf.math.log instead.



##
gclasses: [<tf.Tensor 'bboxes_encode_block_0/while/Exit_1:0' shape=(64, 64, 4) dtype=int64>, <tf.Tensor 'bboxes_encode_block_1/while/Exit_1:0' shape=(32, 32, 6) dtype=int64>, <tf.Tensor 'bboxes_encode_block_2/while/Exit_1:0' shape=(16, 16, 6) dtype=int64>, <tf.Tensor 'bboxes_encode_block_3/while/Exit_1:0' shape=(8, 8, 6) dtype=int64>, <tf.Tensor 'bboxes_encode_block_4/while/Exit_1:0' shape=(4, 4, 6) dtype=int64>, <tf.Tensor 'bboxes_encode_block_5/while/Exit_1:0' shape=(2, 2, 4) dtype=int64>, <tf.Tensor 'bboxes_encode_block_6/while/Exit_1:0' shape=(1, 1, 4) dtype=int64>]
glocalization: [<tf.Tensor 'bboxes_encode_block_0/stack:0' shape=(64, 64, 4, 4) dtype=float32>, <tf.Tensor 'bboxes_encode_block_1/stack:0' shape=(32, 32, 6, 4) dtype=float32>, <tf.Tensor 'bboxes_encode_block_2/stack:0' shape=(16, 16, 6, 4) dtype=float32>, <tf.Tensor 'bboxes_encode_block_3/stack:0' shape=(8, 8, 6, 4) dtype=float32>, <tf.Tensor 'bboxes_encode_block_4/stack:0' shape=(4, 4, 6, 4) dtype=float32>, <tf.Tens

W0323 12:28:25.642459 139898556168000 deprecation.py:323] From <ipython-input-1-a5c4cd3199c3>:268: batch (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.batch(batch_size)` (or `padded_batch(...)` if `dynamic_pad=True`).





W0323 12:28:25.657458 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/deployment/model_deploy.py:194: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.






W0323 12:28:25.659054 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/deployment/model_deploy.py:194: The name tf.get_variable_scope is deprecated. Please use tf.compat.v1.get_variable_scope instead.



Instructions for updating:
Please use `layer.__call__` method instead.


W0323 12:28:25.663528 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1057: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.





W0323 12:28:26.242242 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/nets/ssd_vgg_512.py:579: The name tf.losses.compute_weighted_loss is deprecated. Please use tf.compat.v1.losses.compute_weighted_loss instead.






W0323 12:28:26.716893 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/nets/ssd_vgg_512.py:604: The name tf.add_to_collection is deprecated. Please use tf.compat.v1.add_to_collection instead.






W0323 12:28:26.939909 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/tf_utils.py:105: The name tf.train.exponential_decay is deprecated. Please use tf.compat.v1.train.exponential_decay instead.






W0323 12:28:26.946065 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/tf_utils.py:162: The name tf.train.RMSPropOptimizer is deprecated. Please use tf.compat.v1.train.RMSPropOptimizer instead.






W0323 12:28:26.949170 139898556168000 module_wrapper.py:139] From /home/saket/Dense/SSD/tf_utils.py:245: The name tf.trainable_variables is deprecated. Please use tf.compat.v1.trainable_variables instead.



Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


W0323 12:28:28.029665 139898556168000 deprecation.py:506] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/rmsprop.py:119: calling Ones.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession


W0323 12:28:28.889024 139898556168000 deprecation.py:323] From /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/contrib/slim/python/slim/learning.py:742: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession


INFO:tensorflow:Restoring parameters from /tmp/tfmodel/model.ckpt-0


I0323 12:28:29.239311 139898556168000 saver.py:1284] Restoring parameters from /tmp/tfmodel/model.ckpt-0


INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.NotFoundError'>, Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key ssd_512_vgg/block10/conv1x1/biases not found in checkpoint
	 [[node save/RestoreV2 (defined at /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]

Original stack trace for 'save/RestoreV2':
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instanc

I0323 12:28:29.743692 139898556168000 coordinator.py:224] Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.NotFoundError'>, Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key ssd_512_vgg/block10/conv1x1/biases not found in checkpoint
	 [[node save/RestoreV2 (defined at /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]

Original stack trace for 'save/RestoreV2':
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel_launcher.py", line 

NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key ssd_512_vgg/block10/conv1x1/biases not found in checkpoint
	 [[node save/RestoreV2 (defined at /home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]

Original stack trace for 'save/RestoreV2':
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
    self._run_once()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
    handle._run()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/ioloop.py", line 688, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/ioloop.py", line 741, in _run_callback
    ret = callback()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 814, in inner
    self.ctx_run(self.run)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 775, in run
    yielded = self.gen.send(value)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 358, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 538, in execute_request
    user_expressions, allow_stdin,
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 302, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 539, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2895, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2940, in _run_cell
    return runner(coro)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3166, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3357, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-a5c4cd3199c3>", line 408, in <module>
    tf.app.run()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/platform/app.py", line 40, in run
    _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/absl/app.py", line 303, in run
    _run_main(main, args)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/absl/app.py", line 251, in _run_main
    sys.exit(main(argv))
  File "<ipython-input-1-a5c4cd3199c3>", line 390, in main
    pad_step_number=False)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/saver.py", line 828, in __init__
    self.build()
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/saver.py", line 840, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/saver.py", line 878, in _build
    build_restore=build_restore)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/saver.py", line 508, in _build_internal
    restore_sequentially, reshape)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/saver.py", line 328, in _AddRestoreOps
    restore_sequentially)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/training/saver.py", line 575, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_io_ops.py", line 1696, in restore_v2
    name=name)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
    op_def=op_def)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
    attrs, op_def, compute_device)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
    op_def=op_def)
  File "/home/saket/anaconda3/envs/tensorflow/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
    self._traceback = tf_stack.extract_stack()


In [4]:
FILENAMES = tf.io.gfile.glob(GCS_PATH)
split_ind = int(0.9 * len(FILENAMES))
TRAINING_FILENAMES, VALID_FILENAMES = FILENAMES[:split_ind], FILENAMES[split_ind:]
print("Train TFRecord Files:", len(TRAINING_FILENAMES))
print("Validation TFRecord Files:", len(VALID_FILENAMES))

Train TFRecord Files: 491
Validation TFRecord Files: 55


In [5]:
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    #image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image


In [6]:
def read_tfrecord(example, labeled):
    tfrecord_format = (
        {'image/cam_stereo_left_lut': tf.io.FixedLenFeature([],tf.string),
        'image/format': tf.io.FixedLenFeature([], tf.string, default_value='png'),
        'image/shape/cam_stereo_left_lut': tf.FixedLenFeature([3], tf.int64),
        'image/object/class/text': tf.io.VarLenFeature(dtype=tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32),
        }
    )
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example["image/cam_stereo_left_lut"])
    if labeled:
        label = tf.cast(example["image/object/class/text"], tf.string)
        #colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
        bbox_y_min = tf.cast("image/object/bbox/y_min", tf.float32)
        bbox_x_min = tf.cast("image/object/bbox/x_min", tf.float32)  
        bbox_y_max = tf.cast("image/object/bbox/y_max", tf.float32)
        bbox_x_max = tf.cast("image/object/bbox/x_max", tf.float32)
        #bbox = [bbox_y_min,bbox_x_min,bbox_y_max, bbox_x_max]
        #bbox = example[bbox]
        #bbox = tf.cast(example[("image/object/bbox/y_min","image/object/bbox/x_min",
        #                       "image/object/bbox/y_max","image/object/bbox/x_max")],
        #               tf.int32)

        return image, label
    return image


In [7]:
def load_dataset(filenames, labeled=True):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False  # disable order, increase speed
    dataset = tf.data.TFRecordDataset(
        filenames
    )  # automatically interleaves reads from multiple files
    dataset = dataset.with_options(
        ignore_order
    )  # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(
        partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE
    )
    # returns a dataset of (image, label) pairs if labeled=True or just images if labeled=False
    return dataset


In [8]:
@tf.function
def get_dataset(filenames, labeled=True):
    dataset = load_dataset(filenames, labeled=labeled)
    dataset = dataset.shuffle(2048)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset


In [9]:
train_dataset = get_dataset(TRAINING_FILENAMES)
valid_dataset = get_dataset(VALID_FILENAMES)
tf.enable_eager_execution
#image_batch, label_batch = next(iter(valid_dataset))

#len(train_dataset)





<function tensorflow.python.framework.ops.enable_eager_execution(config=None, device_policy=None, execution_mode=None)>

In [10]:
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=20, decay_rate=0.96, staircase=True
)

checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "melanoma_model.h5", save_best_only=True
)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=10, restore_best_weights=True
)

In [23]:
def make_model():
   # base_model = tf.keras.applications.Xception(
    #    input_shape=(*IMAGE_SIZE, 3), include_top=False, weights="imagenet"
    #)

    #base_model.trainable = False

    inputs = tf.keras.layers.Input([*IMAGE_SIZE, 3])
    x = tf.keras.applications.xception.preprocess_input(inputs)
    #x = base_model(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(8, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        loss="binary_crossentropy",
    )

    return model

In [12]:
import numpy as np
import tensorflow as tf
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

INFO:tensorflow:Single-worker CollectiveAllReduceStrategy with local_devices = ('/device:CPU:0',), communication = CollectiveCommunication.AUTO


In [25]:
!pip install keras-efficientnet



In [20]:
#import tensorflow as tf
#from tf.keras.application import efficientnet
import keras

ImportError: Keras requires TensorFlow 2.2 or higher. Install TensorFlow via `pip install tensorflow`

In [15]:

train_filenames = tf.io.gfile.glob(f"/data/datasets/saket/SeeingThroughFogData/train_clear_day/*.swedentfrecord")
batch_size = 32
epochs = 1
steps_per_epoch = 50
AUTOTUNE = tf.data.experimental.AUTOTUNE

input_tensor = tf.keras.layers.Input(shape=(224, 224, 3), name="image")
model = tf.keras.applications.EfficientNetB0(
    input_tensor=input_tensor, weights=None, classes=91
)


model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)


model.fit(
    x=get_dataset(train_filenames, batch_size),
    epochs=epochs,
    steps_per_epoch=steps_per_epoch,
    verbose=1,
)

AttributeError: module 'tensorflow.python.keras.api._v1.keras.applications' has no attribute 'EfficientNetB0'

In [26]:
with strategy.scope():
    model = make_model()

history = model.fit(
    valid_dataset,
    epochs=2,
    validation_data=valid_dataset
)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on None steps
Epoch 1/2


InvalidArgumentError: Incompatible shapes: [64,1] vs. [703,2]
	 [[{{node training/Adam/gradients/gradients/loss_1/dense_1_loss/logistic_loss/mul_grad/BroadcastGradientArgs}}]]

In [78]:
def create_example(image, path, example):
    feature = {
        "image": image_feature(image),
        "path": bytes_feature(path),
        "area": float_feature(example["area"]),
        "bbox": float_feature_list(example["bbox"]),
        "category_id": int64_feature(example["category_id"]),
        "id": int64_feature(example["id"]),
        "image_id": int64_feature(example["image_id"]),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))


def parse_tfrecord_fn(example):
    feature_description = {
       'image/format': tf.io.FixedLenFeature((), tf.string, default_value='png'),
        #'image/object/class/text': tf.io.FixedLenFeature((), tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/angle': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/truncation': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/occlusion': tf.io.VarLenFeature(dtype=tf.int64),
        'image/object/object/bbox3d/height': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/width': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/length': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/x': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/y': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/z': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox3d/alpha3d': tf.io.VarLenFeature(dtype=tf.float32),
        'image/shape': tf.io.FixedLenFeature([3], tf.int64),
        }

    example = tf.io.parse_single_example(example, feature_description)
    example["image"] = tf.io.decode_jpeg(example["image/format"], channels=3)
    #example["label"] = tf.sparse.to_denset(example["image/object/class/text"], tf.int32)
    example["bbox_y_min"] = tf.sparse.to_dense(example["image/object/bbox/ymin"])
    return example

In [79]:
raw_dataset = tf.data.TFRecordDataset("/data/datasets/saket/SeeingThroughFogData/train_clear_day/train_clear_day_000000.swedentfrecord")
parsed_dataset = raw_dataset.map(parse_tfrecord_fn)

for features in parsed_dataset.take(1):
    for key in features.keys():
        if key != "image":
            print(f"{key}: {features[key]}")

    print(f"Image shape: {features['image'].shape}")
    plt.figure(figsize=(7, 7))
    plt.imshow(features["image"].numpy())
    plt.show()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


InvalidArgumentError: Feature: image/shape (data type: int64) is required but could not be found.
	 [[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]