In [None]:
!apt install cuda-11-8

In [None]:
!export CUDA_PATH=/usr/local/cuda-11.8/

In [None]:
!nvidia-smi

In [None]:
!pip install --upgrade pip

In [None]:
# You need to update OpenCV if you are using Colab.
# Uncomment this line if you are using Colab.
!pip install opencv-python --upgrade

In [None]:
# !apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2

In [None]:
import os

if not 'models' in os.listdir():
  !git clone --depth 1 https://github.com/tensorflow/models

In [None]:
# Install PyCocoTools
!pip install pycocotools

In [None]:
# The `%%bash` magic command inside a notebook lets you run a cell run like a shell interface
# Note: the `bash` command works only on Colab.
%%bash

# Change the directory to models/research
cd models/research/

# Compile the API's Protobuf files
protoc object_detection/protos/*.proto --python_out=.

# Copy the required Setup file
cp object_detection/packages/tf2/setup.py .

# Install the API using the setup.py file
python -m pip install .

In [None]:
!pip install -U tensorflow==2.13.0

In [None]:
# Test the API if everything was installed correctly
!python models/research/object_detection/builders/model_builder_tf2_test.py

In [None]:
from zipfile import ZipFile
import urllib
import tarfile
import requests

import sys
import cv2

import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf

In [None]:
def download_file(url, save_name):
    url = url
    file = requests.get(url)

    open(save_name, 'wb').write(file.content)

In [None]:
def unzip(zip_file=None):
    try:
        with ZipFile(zip_file) as z:
            z.extractall("./")
            print("Extracted all")
    except:
        print("Invalid file")

In [None]:
# Download the PASCAL VOC subset data.
download_file(
                  'https://www.dropbox.com/s/415tmokwg5xkw99/pascal_voc_subset_data.zip?dl=1',
                  'pascal_voc_subset_data.zip'
                 )

In [None]:
unzip(zip_file='pascal_voc_subset_data.zip')

In [None]:
import os

In [None]:
if not os.path.exists('tfod_utils'):
    download_file(
                  'https://www.dropbox.com/sh/i5uds5hgzc0phik/AADHAlWQk7VmTSFtoWj9pBZJa?dl=1',
                  'tfod_utils_pascal_voc_subset.zip'
                 )
    unzip(zip_file='tfod_utils_pascal_voc_subset.zip')

In [None]:
# Generate LabelMap File
# Create Label Map of the Dataset
pbtxt = '''
item {
    name: 'car',
    id: 1,
}

item {
    name: 'dog',
    id: 2,
}

item {
    name: 'person',
    id: 3,
}

item {
    name: 'tvmonitor',
    id: 4,
}
'''

# Save this labelmap to disk
with open("labelmap.pbtxt", "w") as text_file:
    text_file.write(pbtxt)

In [None]:
# Generate TF Records
# create tf_record for train data
!python ./tfod_utils/xml_to_csv.py -x train -c train_data.csv
!python ./tfod_utils/generate_tfrecord.py --csv_input=train_data.csv --output_path=train_data.tfrecord --image_dir=train --labelmap_path=labelmap.pbtxt

In [None]:
# create tf_record for val data
!python ./tfod_utils/xml_to_csv.py -x val -c val_data.csv
!python ./tfod_utils/generate_tfrecord.py --csv_input=val_data.csv --output_path=val_data.tfrecord --image_dir=val --labelmap_path=labelmap.pbtxt

In [None]:
color_dict = {'car': (255,255,0), 'dog': (0,255,0), 'person': (255,125,125), 'tvmonitor': (0,255,255)}

In [None]:
# Method to parse the TF Record so we can visualize it
def parse_record(data_record):
    """
    Parse the data record from a tfrecord file, typically pulled from an iterator,
    in this case a one_shot_iterator created from the dataset.

    Args:
    data_record:  Path for the TF Record we want to parse
    """

    # Initialize a feature dictionary containing the information we want from the
    # The TF Record file after it is parsed
    feature = {'image/encoded': tf.io.FixedLenFeature([], tf.string),
                  'image/object/class/label': tf.io.VarLenFeature(tf.int64),
                  'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
                  'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
                  'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
                  'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
                  'image/filename': tf.io.FixedLenFeature([], tf.string)
                  }

    # Parse each entry in the TF Record
    parsed_example = tf.io.parse_single_example(data_record, feature)

    # Get File name from parsed entry
    fname = parsed_example['image/filename'].numpy()

    # Get the encoded from parsed entry
    encoded_image = parsed_example['image/encoded']

    # Decode the image into an array
    image_np = tf.image.decode_image(encoded_image, channels=3).numpy()

    # Convert Sparse Tensor to Dense tensor so we get the complete information
    # Regarding each of the features
    labels =  tf.sparse.to_dense(parsed_example['image/object/class/label'],
                                    default_value=0).numpy()
    x1norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/xmin'],
                                    default_value=0).numpy()
    x2norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/xmax'],
                                    default_value=0).numpy()
    y1norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/ymin'],
                                    default_value=0).numpy()
    y2norm =  tf.sparse.to_dense(parsed_example['image/object/bbox/ymax'],
                                    default_value=0).numpy()

    # Number of bounding boxes in an image
    num_bboxes = len(labels)

    # Get height and width of image
    height, width = image_np[:, :, 1].shape

    # Return the Parsed TF Recrods Image attributes
    return fname, image_np, labels, x1norm, x2norm, y1norm, y2norm, num_bboxes, height, width

In [None]:
# Method to Visualize the TF Records File
def view_records(file_path, class_labels):
    """
    Peek at the data using OpenCV and TensorFlow tools.

    Args:
      file_path: Path to tfrecord file (usually has 'record' extension)
      class_labels: Dictionary of labels with name:number pairs (start with 1)
      verbose (default 1): Display text output if 1, display nothing except
      images otherwise.
    """

    # Read the TF Record Dataset
    dataset = tf.data.TFRecordDataset([file_path])

    # Convert the dataset into an iterator so we can loop through images
    record_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)

    # Find the number of images and their labels in our dataset
    num_records = dataset.reduce(np.int64(0), lambda x, _: x + 1).numpy()

    # Loop over the 10 images to visualize the dataset
    for im_ind in range(10):

        # Parse each entry in the TF Record
        parsed_example = parse_record(record_iterator.get_next())

        # Get attributes about each image after being parsed
        fname, image_np, labels, x1norm, x2norm, y1norm, y2norm, num_bboxes, height, width = parse_record(record_iterator.get_next())

        # Create a copy of the image we read so original image stays same
        image_copy = image_np.copy()

        # Convert the image to RGB
        image_rgb = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)

        # Check to see if an image has bounding boxes
        if num_bboxes > 0:

            # Multiply the width and height bounding box values to rescale them
            # Back to their original value
            x1 = np.int64(x1norm*width)
            x2 = np.int64(x2norm*width)
            y1 = np.int64(y1norm*height)
            y2 = np.int64(y2norm*height)

            # Loop over the number of the bounding boxes so each one can be drawn
            for bbox_ind in range(num_bboxes):

                # Create a tuple of the bbox values
                bbox = (x1[bbox_ind], y1[bbox_ind], x2[bbox_ind], y2[bbox_ind])

                # Get the Label Name of each of the bounding box
                label_name = list(class_labels.keys())[list(class_labels.values()).index(labels[bbox_ind])]

                color = color_dict[label_name]

                # Draw the bounding box
                cv2.rectangle(image_rgb, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                                color, 2)

                # Get text width and height
                ((text_width, text_height), _) = cv2.getTextSize(label_name, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2)

                cv2.rectangle(image_rgb, (bbox[0], bbox[1] - int(0.9 * text_height)), (bbox[0] + int(0.4*text_width), bbox[1]), color, thickness=-1)

                # Write the detected class name for each bounding box
                cv2.putText(image_rgb, label_name,
                            (bbox[0],bbox[1] - int(0.3 * text_height)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)


        # Show the image
        plt.figure(figsize=(10,10));
        plt.imshow(image_rgb[...,::-1]);
        plt.axis('off');
        plt.title(f"Height/Width: {height, width}, Num bboxes: {num_bboxes}")

In [None]:
# Create a class_labels dictionary same as the label map
class_labels =  {"car" : 1, "dog": 2, "person": 3, "tvmonitor": 4}


# We're Visualizing the train TF Record, you can also visualize the test record file.
data_path = "val_data.tfrecord"

# Call the view_records
view_records(data_path, class_labels)

In [None]:
# Download and Train Faster RCNN (with ResNet101 backbone)

In [None]:
# Function to Download any model from Model Zoo from their URL
def download_model(model_name, url):

    # Initialize the Downloader object of urllib
    opener  = urllib.request.URLopener()
    opener.retrieve(url, model_name+'.tar.gz')

    # Extract the Model
    tar = tarfile.open(model_name + '.tar.gz')
    tar.extractall(model_name)
    tar.close

In [None]:
# Define URL and name of the Model
model_url = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet101_v1_640x640_coco17_tpu-8.tar.gz'
model_directory = 'Faster_RCNN_Resnet_101'

model_name = model_url.split('/')[-1].split('.')[0] # faster_rcnn_resnet101_v1_640x640_coco17_tpu-8

# Download FasterRCNN from the Model Zoo
download_model(model_directory, model_url)

In [None]:
# Paths of Train and Test TF Record Files
train_record_path = 'train_data.tfrecord'
test_record_path = 'val_data.tfrecord'

# Path of Label Map of the Dataset
labelmap_path = 'labelmap.pbtxt'

# Checkpoint File of the Pre-Trained model so we can use what the model has learned
fine_tune_checkpoint_path = os.path.join(model_directory, model_name, 'checkpoint', 'ckpt-0')

# Type of model of how we will use it
fine_tune_checkpoint_type = 'detection'

# Set Batch Size
batch_size = 5

# Set this to True only if using TPU for training
bfloat = False

# Number of Training Steps
num_steps = 20000
# Uncomment the below line if you want first to verify the training pipeline.
# num_steps = 1000

# Initial LR
initial_lr = 0.0003

# Path of the pipeline file we downloaded
base_config_path = os.path.join(model_directory, model_name, 'pipeline.config')

# Number of classes from Dataset
num_classes = 4

In [None]:
# Import the API's config_util file to read the config file and edit it
from object_detection.utils import config_util

In [None]:
def update_fine_tune_checkpoint_path(train_config, checkpoint_path):
    """
    Args:
    train_config: train_pb2.TrainConfig object.
    checkpoint_path: path to pre-trained modelâ€™s checkpoint.
    """

    train_config.fine_tune_checkpoint = checkpoint_path

    return

In [None]:
def update_fine_tune_checkpoint_type(train_config, fine_tune_checkpoint_type):
    """
    Args:
    train_config: train_pb2.TrainConfig object.
    fine_tune_checkpoint_type: determines the type of weights that are restored from
                               from the pre-trained fine_tune_checkpoint.
                               Can be either of: "classification", "detection" or "full".
    """

    train_config.fine_tune_checkpoint_type = fine_tune_checkpoint_type

    return

In [None]:
# Read the config file in the form a dictionary
configs = config_util.get_configs_from_pipeline_file(base_config_path)

# Update the Training TF Record file path
config_util.update_input_reader_config(configs,
                                       'train_input_config',
                                       'tf_record_input_reader',
                                       'input_path',
                                       train_record_path,
                                      )

# Update the Testing TF Record file path
config_util.update_input_reader_config(configs,
                                       'eval_input_config',
                                       'tf_record_input_reader',
                                       'input_path',
                                       test_record_path,
                                      )

# Update fine tune checkpoint path
update_fine_tune_checkpoint_path(configs['train_config'], fine_tune_checkpoint_path)

# Update fine tune checkpoint type
update_fine_tune_checkpoint_type(configs['train_config'], fine_tune_checkpoint_type)

# Update batch size
config_util._update_batch_size(configs, batch_size)

# Update Number of Steps
config_util._update_train_steps(configs, num_steps)

# Update Number of Classes
config_util._update_num_classes(configs['model'], num_classes)

# Update Label Map path
config_util._update_label_map_path(configs, labelmap_path)

# Update bfloat16 value
config_util._update_use_bfloat16(configs, bfloat)

# Update Initial LR
config_util._update_initial_learning_rate(configs, initial_lr)

# Create a pipeline file instance from the edited configuration instance
configs_file = config_util.create_pipeline_proto_from_configs(configs)

# Save the pipeline into a directory
config_util.save_pipeline_config(configs_file, './')

In [None]:
# Directory where the Model's weights will be stored
# along with training logs during training.
model_dir = 'training_of_model'

In [None]:
# This loads tensorboad in the notebook
%load_ext tensorboard

# Magic command to launch tensorboard
%tensorboard --logdir {model_dir}

In [None]:
# Train Model

# Path of the new edited Pipeline Configuration File we will use
pipeline_config_path = 'pipeline.config'

In [None]:
# Use the TFOD API Script to Initiate Training
!python ./models/research/object_detection/model_main_tf2.py \
  --pipeline_config_path={pipeline_config_path} \
  --model_dir={model_dir} \
  --alsologtostderr

In [None]:
!python ./models/research/object_detection/model_main_tf2.py \
  --model_dir {model_dir} \
  --pipeline_config_path {pipeline_config_path} \
  --checkpoint_dir {model_dir}