In [None]:
# You need to update OpenCV if you are using Colab.
# Uncomment this line if you are using Colab.
!pip install opencv-python --upgrade

In [None]:
import os

if not 'models' in os.listdir():
  !git clone --depth 1 https://github.com/tensorflow/models

In [None]:
# The `%%bash` magic command inside a notebook lets you run a cell run like a shell interface
# Note: the `bash` command works only on Colab.
%%bash

# Change the directory to models/research
cd models/research/

# Compile the API's Protobuf files
protoc object_detection/protos/*.proto --python_out=.

# Copy the required Setup file
cp object_detection/packages/tf2/setup.py .

# Install the API using the setup.py file
python -m pip install .

In [None]:
# To fix:
# - TypeError: Descriptors cannot be created directly,

!pip install protobuf==3.20.0

In [None]:
# Restart Session

import os
# TensorFlow + Keras 2 backwards compatibility
os.environ["TF_USE_LEGACY_KERAS"] = "1"

In [None]:
from zipfile import ZipFile
import urllib
import requests

from io import BytesIO
from urllib.request import urlopen

import cv2

import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf

In [None]:
def download_file(url, save_name):
  file=requests.get(url)

  open(save_name, 'wb').write(file.content)

In [None]:
def unzip(zip_file=None):
    try:
        with ZipFile(zip_file) as z:
            z.extractall("./")
            print("Extracted all")
    except:
        print("Invalid file")

In [None]:
# Download the PASCAL VOC subset data.
download_file(
                  'https://www.dropbox.com/s/415tmokwg5xkw99/pascal_voc_subset_data.zip?dl=1',
                  'pascal_voc_subset_data.zip'
                 )

In [None]:
unzip(zip_file='pascal_voc_subset_data.zip')

In [None]:
if not os.path.exists('tfod_utils'):
    download_file(
                  'https://www.dropbox.com/sh/i5uds5hgzc0phik/AADHAlWQk7VmTSFtoWj9pBZJa?dl=1',
                  'tfod_utils_pascal_voc_subset.zip'
                 )
    unzip(zip_file='tfod_utils_pascal_voc_subset.zip')

In [None]:
# Generate a LabelMap File

In [None]:
# Create Label Map of the Dataset
pbtxt = '''
item {
    name: 'car',
    id: 1,
}

item {
    name: 'dog',
    id: 2,
}

item {
    name: 'person',
    id: 3,
}

item {
    name: 'tvmonitor',
    id: 4,
}
'''

# Save this labelmap to disk
with open("labelmap.pbtxt", "w") as text_file:
    text_file.write(pbtxt)

In [None]:
# Generate TF Records

In [None]:
# Create a csv file for the validation data.
!python ./tfod_utils/xml_to_csv.py -x val -c val_data.csv

In [None]:
# Create tf_record for val data.
!python ./tfod_utils/generate_tfrecord.py --csv_input=val_data.csv --output_path=val_data.tfrecord --image_dir=val --labelmap_path=labelmap.pbtxt

In [None]:
# Visualize data from TF Records

In [None]:
color_dict = {'car': (255,255,0), 'dog': (0,255,0), 'person': (255,125,125), 'tvmonitor': (0,255,255)}

In [None]:
# Method to parse the TF Record so we can visualize it.
def parse_record(data_record):
    """
    Parse the data record from a tfrecord file, typically pulled from an iterator,
    in this case a one_shot_iterator created from the dataset.

    Args:
    data_record:  Path for the TF Record we want to parse
    """

    # Initialize a feature dictionary containing the information we want from the
    # the TF Record file after it is parsed.
    feature = {'image/encoded': tf.io.FixedLenFeature([], tf.string),
                  'image/object/class/label': tf.io.VarLenFeature(tf.int64),
                  'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
                  'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
                  'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
                  'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
                  'image/filename': tf.io.FixedLenFeature([], tf.string)
                  }

    # Parse each entry in the TF Record.
    parsed_example = tf.io.parse_single_example(data_record, feature)

    # Get File name from parsed entry.
    fname = parsed_example['image/filename'].numpy()

    # Get the encoded from parsed entry.
    encoded_image = parsed_example['image/encoded']

    # Decode the image into an array.
    image_np = tf.image.decode_image(encoded_image, channels=3).numpy()

    # Convert Sparse Tensor to Dense tensor so we get the complete information
    # regarding each of the features.
    labels =  tf.sparse.to_dense(parsed_example['image/object/class/label'],
                                    default_value=0).numpy()
    x1norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/xmin'],
                                    default_value=0).numpy()
    x2norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/xmax'],
                                    default_value=0).numpy()
    y1norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/ymin'],
                                    default_value=0).numpy()
    y2norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/ymax'],
                                    default_value=0).numpy()

    # Number of bounding boxes in an image.
    num_bboxes = len(labels)

    # Get height and width of image.
    height, width = image_np[:, :, 1].shape

    # Return the Parsed TF Recrods Image attributes.
    return fname, image_np, labels, x1norm, x2norm, y1norm, y2norm, num_bboxes, height, width

In [None]:
# Method to Visualize the TF Records File
def view_records(file_path, class_labels):
    """
    Peek at the data using OpenCV and TensorFlow tools.

    Args:
      file_path: Path to tfrecord file (usually has 'record' extension)
      class_labels: Dictionary of labels with name:number pairs (start with 1)
      verbose (default 1): Display text output if 1, display nothing except
      images otherwise.
    """

    # Read the TF Record Dataset
    dataset = tf.data.TFRecordDataset([file_path])

    # Convert the dataset into an iterator so we can loop through images
    record_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)

    # Find the number of images and their labels in our dataset
    num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy()

    # Loop over the 10 images to visualize the dataset
    for im_ind in range(10):

        # Parse each entry in the TF Record
        parsed_example = parse_record(record_iterator.get_next())

        # Get attributes about each image after being parsed
        fname, image_np, labels, x1norm, x2norm, y1norm, y2norm, num_bboxes, height, width = parse_record(record_iterator.get_next())

        # Create a copy of the image we read so original image stays same
        image_copy = image_np.copy()

        # Convert the image to RGB
        image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)

        # Check to see if an image has bounding boxes
        if num_bboxes > 0:

            # Multiply the width and height bounding box values to rescale them
            # Back to their original value
            x1 = np.int64(x1norm*width)
            x2 = np.int64(x2norm*width)
            y1 = np.int64(y1norm*height)
            y2 = np.int64(y2norm*height)

            # Loop over the number of the bounding boxes so each one can be drawn
            for bbox_ind in range(num_bboxes):

                # Create a tuple of the bbox values
                bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind])

                # Get the Label Name of each of the bounding box
                label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])]

                color = color_dict[label_name]

                # Draw the bounding box
                cv2.rectangle(image_rgb, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                                color, 2)

                # Get text width and height
                ((text_width, text_height), _) = cv2.getTextSize(label_name, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2)

                cv2.rectangle(image_rgb, (bbox[0], bbox[1] - int(0.9 * text_height)), (bbox[0] + int(0.4*text_width), bbox[1]), color, thickness=-1)

                # Write the detected class name for each bounding box
                cv2.putText(image_rgb, label_name,
                            (bbox[0],bbox[1] - int(0.3 * text_height)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)


        # Show the image
        plt.figure(figsize=(10,10));
        plt.imshow(image_rgb[...,::-1]);
        plt.axis('off');
        plt.title(f"Height/Width: {height, width}, Num bboxes: {num_bboxes}")