In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display

# Structure for detection task

1. Dataloaders:
    - Input images for predicton of detection task: 
         * preprocess_input(): process original images to image data
         * decoder(): convert image data to tensor
         * parser(): breaks the input into parts
    - Load pre-trained weights into YOLOv3 model:
        * (utils) download_weights()
        * load_weights_to_model()
 2. Modeling:
     - nn_block() or layers:
     - yolo_model():
 3. Ops:
     - decode_netout(): decode the prediction of detection model into boxes 
     - correct_boxes():
     - nms() and bbox_iou():
     - draw_boxes():
 4. Configs:
     - Image Input:
         * image_path:
         * num_classes:
         * input_size: for modeling
     - Pretrained weights:
         * pb_file: pretrain weights
 
 5. Tasks:
     - build_input():
     - build_model():
     - build_loss():
     - metrics:
 
 6. Common/Import Registry:
     - Include the components above
 
 7. Train:
     - train.py
 
 8. Demo

In [2]:
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

## Data Loader

Reads, Decodes and Parses the input data. Contains a decoder and Parser.

Decoder decodes a TF record and returns a dictionary of decoded tensors. 

In [1]:
import tensorflow_models as tfm
from official.vision.dataloaders.tf_example_decoder import TfExampleDecoder

pp = pprint.PrettyPrinter(indent=4) # Set Pretty Print Indentation
print(tf.__version__) # Check the version of tensorflow used

%matplotlib inline

ModuleNotFoundError: No module named 'tensorflow_models'

In [20]:
class YOLODecoder(decoder.Decoder): # inherits from decoder.Decoder provided by Tensorflow Model Garden
    def __init__(self):
        """Initializes the decoder.
        Defines mapping between the field name and value from an input. 
        E.g. We define two fields for image bytes and labels."""
        
        # defines a dictionary that contains mapping between feature keys and 
        # their corresponding feature types.
        self._keys_to_features = {
            # FixedLenFeature is used when the feature has a fixed length. It expexts
            # a single value or a tensor of a specified shape and data type.
            # 'image/encoded' is a fixed-length feature representing the encoded image data. 
            # The shape is specified as '()' since its a single string (the image data), and the data type is 
            # 'tf.string'.
            # VarLenFeature is used when the feature has a variable length. It expects a
            # sparse tensor representing a list of values of the specified data type. 
            # 'image/object/bbox/xmin', 'image/object/bbox/ymin', 'image/object/bbox/xmax', 
            # 'image/object/bbox/ymax', and 'image/object/class/label' are variable-length features.
            'image/encoded': tf.io.FixedLenFeature((), tf.string, default_value=''),
            #'image/class/label': tf.io.FixedLenFeature((), tf.int64, default_value = -1)
            # Since the number of bounding boxes and labels can vary from image to image, 
            # it's more efficient to use variable-length features. 
            'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
            'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
            'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
            'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
            'image/object/class/label': tf.io.VarLenFeature(tf.float32),
        }
        
    def decode(self, features): # Features in a dictionary containing decoded tensors
        
        # decode image
        
        # decodes the encoded image data ('image/encoded') using tf.image.decode_jpeg. decodes jpeg-encoded images into uint8 tensors. channels = 3 indicates RGB.
        image = tf.image.decode_jpeg(features['image/encoded'], channels = 3)
        
        # Decode the bounding boxes
        
        # decodes sparse tensors to dense tensors
        xmin = tf.sparse.to_dense(features['image/object/bbox/xmin'])
        ymin = tf.sparse.to_dense(features['image/object/bbox/ymin'])        
        xmax = tf.sparse.to_dense(features['image/object/bbox/xmax'])        
        ymax = tf.sparse.to_dense(features['image/object/bbox/ymax'])        
        labels = tf.sparse.to_dense(features['image/object/class/label'])        
        
        
        # Combine bounding box coordinates
        
        # stacks the decoded bounding box coordinates ('xmin', 'ymin', 'xmax', 'ymax') along the last axis
        # ('axis = -1') to form a tensor of shape '(num_boxes, 4)'.
        # Each row of this tensor represents a bounding box with coordinates '[xmin, ymin, xmax, ymax]'
        boxes = tf.stack([xmin, ymin, xmax, ymax], axis = -1)
        
        return image, boxes, labels

NameError: name 'decoder' is not defined