# **Object Detection**

<a target="_blank" href="https://github.com/amaciag/CV-Experiments/blob/main/Shampoo_Object_Detection.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png"/>View source on GitHub</a>

Object Detection is the supervised learning process of locating and identifying objects in an image or video. This tutorial shows how to use some Google tensorflow pretrained model to train on a shampoo image dataset. Transfer learning and data augmentation techniques are incorporated.

## **Resize output cell size**

In [None]:
# Set the default max height=300 for output cells

from google.colab import drive
from IPython.display import Javascript

def resize_output_cell(height=300):
  display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: %i})''' %height))

get_ipython().events.register('pre_run_cell', resize_output_cell)

## **Install and import appropriate modules and their dependencies**

In [None]:
# Clone tensorflow/models repo

!git clone https://github.com/tensorflow/models

In [None]:
# Install object_detection package and its dependecies

%%bash

sudo apt install -y protobuf-compiler
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .
pip install opencv-python-headless==4.1.2.30

In [None]:
# Import object_detection module and other Python libraries

from __future__ import absolute_import, division, print_function, unicode_literals

import io
import json
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import requests
import tensorflow as tf

from google.colab import drive
from IPython.display import Javascript
from object_detection.utils import config_util, dataset_util, label_map_util
from object_detection.utils import visualization_utils as viz_utils
from PIL import Image

%matplotlib inline

## **Create reusable functions**

In [None]:
# Create a tf example to store image features
# TF example is used to structure a TF Record
def create_tf_example(data):
    
    response = requests.get(data['Labeled Data'])
    encoded_jpg = response.content
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for obj in data['Label']['objects']:
        bbox = obj['bbox']
        xmin = bbox['left']
        ymin = bbox['top']
        xmax = xmin + bbox['width']
        ymax = ymin + bbox['height']
        xmins.append(xmin / width)
        xmaxs.append(xmax / width)
        ymins.append(ymin / height)
        ymaxs.append(ymax / height)
        classes_text.append(obj['title'].encode('utf8')) 
        classes.append(label_map_dict[obj['title']])
    
    filename = data['External ID'].encode('utf8')
    image_format = b'jpg'
    tf_example = tf.train.Example(features=tf.train.Features(feature={
            'image/height': dataset_util.int64_feature(height),
            'image/width': dataset_util.int64_feature(width),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/source_id': dataset_util.bytes_feature(filename),
            'image/encoded': dataset_util.bytes_feature(encoded_jpg),
            'image/format': dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label': dataset_util.int64_list_feature(classes),
        }))
    return tf_example

# Generate a tf record by serializing a tf example into a binary format
# Binary data can be read more efficiently
def generate_tf_record(json_input, output_path, label_map):

    global label_map_dict
    label_map_dict = label_map_util.get_label_map_dict(label_map)
    
    with tf.io.gfile.GFile(json_input, 'r') as file:
        dataset = json.load(file)
    
    writer = tf.io.TFRecordWriter(output_path)
    for data in dataset:
        tf_example = create_tf_example(data)
        writer.write(tf_example.SerializeToString())
    writer.close()
    print(f'Successfully created TF-records {output_path}')

# Update a model's training configuration
def update_pipeline_config(pipeline_config, args):
  
  pipeline_config['model'].ssd.num_classes=args['num_classes']
  pipeline_config['train_config'].batch_size=args['batch_size']
  pipeline_config['train_config'].fine_tune_checkpoint_type=args['fine_tune_checkpoint_type']
  pipeline_config['train_config'].fine_tune_checkpoint=args['fine_tune_checkpoint_path']
  pipeline_config['eval_input_config'].label_map_path=args['label_map_path']
  pipeline_config['eval_input_config'].tf_record_input_reader.input_path[0]=args['eval_input_path']
  pipeline_config['train_input_config'].label_map_path=args['label_map_path']
  pipeline_config['train_input_config'].tf_record_input_reader.input_path[0]=args['train_input_path']
  
  return pipeline_config

# Get an image numpy array
def load_image_into_numpy_array(path):
    return np.array(Image.open(path))

# Get a single prediction
def get_prediction(image_url):

  response = requests.get(image_url)
  encoded_jpg = response.content
  encoded = io.BytesIO(encoded_jpg)
  image_np = load_image_into_numpy_array(encoded)
  input_tensor = tf.convert_to_tensor(image_np)
  input_tensor = input_tensor[tf.newaxis, ...]
  detections = detect_fn(input_tensor)

  # All outputs are batches tensors.
  # Convert to numpy arrays, and take index [0] to remove the batch dimension.
  # We're only interested in the first num_detections.  
  num_detections = int(detections.pop('num_detections'))
  detections = {
      key: value[0, :num_detections].numpy()
      for key, value in detections.items()
  }
  detections['num_detections'] = num_detections
  detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

  return image_np, detections


## **Create directories and download shampoo image data**

In [None]:
%%bash

if [ ! -d workspace ]; then
  mkdir -p workspace/{pretrained_models,trained_models,data}
fi

wget -O workspace/data/eval.json https://raw.githubusercontent.com/amaciag/CV-Experiments/main/data/eval.json
wget -O workspace/data/train.json https://raw.githubusercontent.com/amaciag/CV-Experiments/main/data/train.json

In [None]:
# Encode labels through mapping
# Image background is represented as 0

items = [{
  'id': '1',
  'name': '"Loreal"'
},
{
  'id': '2',
  'name': '"Pantene"'
},
{
  'id': '3',
  'name': '"Head_&_Shoulders"'
}]

expression = ''
for item in items:
  expression += 'item {\n'
  for key,value in item.items():
    expression += 2*' ' + key + ': ' + value + '\n'
  expression += '}\n'

with open('workspace/data/shampoo_label_map.pbtxt', 'w') as f:
  f.write(expression)
print(expression)

In [None]:
# Convert images into binary data

generate_tf_record('workspace/data/train.json', 'workspace/data/train.record', 'workspace/data/shampoo_label_map.pbtxt')
generate_tf_record('workspace/data/eval.json', 'workspace/data/eval.record', 'workspace/data/shampoo_label_map.pbtxt')

## **Select and download a pretrained object detection model**

In [None]:
# Select a pretrained object detection model

MODELS = {
    "SSD MobileNet v2 320x320":"http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz",
    "EfficientDet D0 512x512": "http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz",
    "EfficientDet D2 768x768": "http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d2_coco17_tpu-32.tar.gz"
}

#@title Model Selection {display-mode: "form", run: "auto"}
selected_model = "EfficientDet D0 512x512" # @param ['EfficientDet D0 512x512','EfficientDet D2 768x768','SSD MobileNet v2 320x320']
file = MODELS[selected_model].split('/')[-1]

Visit <a target="_blank" href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md">TensorFlow 2 Detection Model Zoo</a> to explore other pretrained object detection models.

In [None]:
!wget {MODELS[selected_model]} --directory-prefix workspace/pretrained_models
!tar -xf workspace/pretrained_models/{file} -C workspace/pretrained_models

## **Tweak the model's training configuration**

In [None]:
model_dir = file.replace('.tar.gz', '')
pipeline_config_path = f'workspace/pretrained_models/{model_dir}/pipeline.config'
pipeline_config = config_util.get_configs_from_pipeline_file(pipeline_config_path)

args = {'num_classes': 3, 
        'batch_size': 4, 
        'fine_tune_checkpoint_type': 'detection',
        'fine_tune_checkpoint_path': f'workspace/pretrained_models/{model_dir}/checkpoint/ckpt-0',
        'label_map_path': 'workspace/data/shampoo_label_map.pbtxt',
        'eval_input_path': 'workspace/data/eval.record',
        'train_input_path': 'workspace/data/train.record'}

pipeline_config = update_pipeline_config(pipeline_config, args)        
pipeline_config = config_util.create_pipeline_proto_from_configs(pipeline_config)
config_util.save_pipeline_config(pipeline_config, 'workspace/trained_models')

## **Train a pretrained model**

In [None]:
resize_output_cell(600)
train_dir = f'workspace/trained_models/{model_dir}'
%reload_ext tensorboard
%tensorboard --logdir={train_dir}/checkpoints

In [None]:
!python models/research/object_detection/model_main_tf2.py --pipeline_config_path=workspace/trained_models/pipeline.config \
  --model_dir={train_dir}/checkpoints --checkpoint_every_n=100 --num_train_steps=500 --alsologtostderr

## **Get and visualize predictions**

In [None]:
# Export a trained inference graph from checkpoints
# This stores neural network operations and trained parameters in a graph

!cp models/research/object_detection/exporter_main_v2.py .
!python exporter_main_v2.py --input_type image_tensor \
  --pipeline_config_path workspace/trained_models/pipeline.config  \
  --trained_checkpoint_dir {train_dir}/checkpoints \
  --output_directory {train_dir}

In [None]:
# Read in the trained model
saved_model_path = os.path.join(train_dir, 'saved_model')
detect_fn = tf.saved_model.load(saved_model_path)

In [None]:
# Predict and visualize results

resize_output_cell(800)

with tf.io.gfile.GFile('workspace/data/eval.json', 'r') as file:
  eval_data = json.load(file)

category_index = label_map_util.create_category_index_from_labelmap('./workspace/data/shampoo_label_map.pbtxt', use_display_name=True)

for data in eval_data:
    
    url = data['Labeled Data']
    image_np, detections = get_prediction(url)
    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np,
          detections['detection_boxes'],
          detections['detection_classes'],
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=.30,
          agnostic_mode=False)
    
    shampoo = category_index[detections['detection_classes'][np.argmax(np.max(detections['detection_multiclass_scores'], axis=1))]]['name']
    plt.figure(figsize=(20,20))
    plt.title(shampoo)
    plt.imshow(image_np)
    # plt.cla()
    # print('Done')
plt.show()