## Object Detection - Tensorflow Models
- code reference taken from - https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/auto_examples/plot_object_detection_saved_model.html#sphx-glr-auto-examples-plot-object-detection-saved-model-py

In [1]:
#import statements
import numpy as np
from PIL import Image
%matplotlib inline

import cv2

from pathlib import Path

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils

import bbox_visualizer as bbv
#img_bbox = bbv.draw_rectangle(img_arr, [427, 347, 1278, 672])

import pandas as pd
import image_preprocessing_library as lib

### Model Name List

In [2]:
model_name_list = [
    #('efficientdet_d0_coco17_tpu-32', '20200711'), # done
    #('efficientdet_d7_coco17_tpu-32', '20200711') #done
    #('ssd_mobilenet_v2_320x320_coco17_tpu-8', '20200711'), #done
    #('ssd_resnet50_v1_fpn_640x640_coco17_tpu-8', '20200711') #done
    #('faster_rcnn_resnet50_v1_640x640_coco17_tpu-8', '20200711'), #done
    ('faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8', '20200711')
    #('ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8', '20200711'), #done
    #('mask_rcnn_inception_resnet_v2_1024x1024_coco17_gpu-8', '20200711'), #some error, needs to be solved first, https://github.com/tensorflow/models/issues/9255
    
]

In [3]:
dataset_path = Path("../../../dataset/object_detection/images")
detection_result_path = Path("../experiment_results/final_results/")
df_columns = ["image_name", "label", "detection_score", "ymin", "xmin", "ymax", "xmax", "model_name", "processing_seq_name"]

In [4]:
model_name = "faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8"
#model_name = "efficientdet_d0_coco17_tpu-32"

### Download the COCO Labels

In [5]:
def download_labels(filename):
    base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/'
    label_dir = tf.keras.utils.get_file(fname=filename,
                                        origin=base_url + filename,
                                        untar=False)
    label_dir = pathlib.Path(label_dir)
    return str(label_dir)

LABEL_FILENAME = 'mscoco_label_map.pbtxt'
PATH_TO_LABELS = download_labels(LABEL_FILENAME)

In [6]:
PATH_TO_LABELS

'C:\\Users\\shubham\\.keras\\datasets\\mscoco_label_map.pbtxt'

### Load label map data (for plotting)

In [7]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
                                                                    use_display_name=True)

### Pre Processing Sequence

In [8]:
pre_processing_seq_dict = {
#     "seq_0" : [], # for raw seq
#     "seq_1" : ["gray"],
#     "seq_2" : ["hsv"],
#     "seq_3" : ["sharpen"],
#     "seq_4" : ["gray", "bilateral_blur", "threshold_mean"],
#     "seq_5" : ["gray", "bilateral_blur", "threshold_gaussian"],
#     "seq_6" : ["gray", "bilateral_blur", "threshold_otsu"],
#     "seq_7" : ["median_blur"],
    "seq_8" : ["gaussian_blur"],
    "seq_9" : ["bilateral_blur"],
    "seq_10" : ["fastnl_blur"],
    "seq_11" : ["gray", "bilateral_blur", "threshold_otsu", "opening"],
    "seq_12" : ["gray", "bilateral_blur", "threshold_otsu", "closing"],
    "seq_13" : ["opening"],
    "seq_14" : ["closing"],
    "seq_15" : ["gray", "sobel"],
    "seq_16" : ["gray", "laplacian"],
    "seq_17" : ["gray", "canny"]
}

### Download model from tensorflow.org/models
- More models can be found in the TensorFlow 2 Detection Model Zoo. To use a different model you will need the URL name of the specific model. This can be done as follows:
1. Right click on the Model name of the model you would like to use;
2. Click on Copy link address to copy the download link of the model;
3. Paste the link in a text editor of your choice. You should observe a link similar to download.tensorflow.org/models/object_detection/tf2/YYYYYYYY/XXXXXXXXX.tar.gz;
4. Copy the XXXXXXXXX part of the link and use it to replace the value of the MODEL_NAME variable in the code shown below;
5. Copy the YYYYYYYY part of the link and use it to replace the value of the MODEL_DATE variable in the code shown below.

In [9]:
# Download and extract model
def download_model(model_name, model_date):
    base_url = 'http://download.tensorflow.org/models/object_detection/tf2/'
    model_file = model_name + '.tar.gz'
    model_dir = tf.keras.utils.get_file(fname=model_name,
                                        origin=base_url + model_date + '/' + model_file,
                                        untar=True)
    return str(model_dir)

def download_models(model_name_list):
    model_dir_path_dict = {}
    for model_name in model_name_list:
        path = download_model(model_name[0], model_name[1])
        model_dir_path_dict[model_name[0]] = path
        print("downloaded {0}".format(model_name[0]))
    return model_dir_path_dict


#use below list to know model name and where it is downloaded locally
model_dir_path_dict = download_models(model_name_list)

downloaded faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8


### Load the downloaded models from directories

In [10]:
def load_images_batch(batch_size, seq_id):
    batch_images = []
    counter = 1
    for image_name in os.listdir(dataset_path):
        img = Image.open(dataset_path/image_name)
        #TO-DO : pre run this step of converting to RGB and remove from here
        rgb_im = img.convert('RGB')
        processed_img = apply_cv_transformations(seq_id, rgb_im)
        batch_images.append((image_name, processed_img))
        if counter % batch_size == 0:
            yield batch_images
            batch_images = []
        counter = counter + 1

def apply_cv_transformations(seq_id, pil_img):
    cv_img = convert_to_cv_img(pil_img)
    operations = get_seq_operations(seq_id)
    processed_img = cv_img
    for operation in operations:
        processed_img = lib.dispatcher[operation](processed_img)
    return convert_to_pil_img(processed_img)         

def convert_to_pil_img(opencv_img):
    if opencv_img.dtype == 'float64':
        opencv_img = opencv_img.astype(np.uint8)
    pil_img = cv2.cvtColor(opencv_img, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(pil_img)
    return pil_img

def convert_to_cv_img(pil_img):
    np_img_arr = np.asarray(pil_img)
    cv_image=cv2.cvtColor(np_img_arr, cv2.COLOR_RGB2BGR)
    return cv_image

def get_seq_operations(seq_id):
    return pre_processing_seq_dict[seq_id]

def get_seq_name(seq_id):
    if seq_id not in pre_processing_seq_dict.keys():
        return "not found for dataset id: " + seq_id
    return " > ".join(get_seq_operations(seq_id))

def load_model(model_name, model_dir):
    print(model_dir)
    path_to_saved_model = model_dir + "/saved_model"

    print('Loading model...', end='')
    start_time = time.time()

    # Load saved model and build the detection function
    detect_fn = tf.saved_model.load(path_to_saved_model)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print('Done! Took {} seconds'.format(elapsed_time))
    return detect_fn



def load_image_into_numpy_array(path):
    return np.array(Image.open(path))

def detect_objects(model, pil_images_tuple):
    #works on single image
    # use Image.fromarray(image_np) for converting below numpy arr img to pil img
    detections_list = []
    for pil_img_tuple in pil_images_tuple:
        image_np = np.array(pil_img_tuple[1])

        # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
        input_tensor = tf.convert_to_tensor(image_np)
        # The model expects a batch of images, so add an axis with `tf.newaxis`.
        input_tensor = input_tensor[tf.newaxis, ...]

        # input_tensor = np.expand_dims(image_np, 0)
        detections = model(input_tensor)
        detections_list.append((pil_img_tuple[0], detections))
    return detections_list

def process_detections(detections_tuple_list):
    # works on list of detections
    processed_detections_list = []
    for detections_tuple in detections_tuple_list:
        detections = detections_tuple[1]
        num_detections = int(detections.pop('num_detections'))
        detections = {key: value[0, :num_detections].numpy()
                       for key, value in detections.items()}
        detections['num_detections'] = num_detections

        # detection_classes should be ints.
        detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
        processed_detections_list.append((detections_tuple[0], detections))
    return processed_detections_list

def visualise_results(detections, image_np, use_normalized_coordinates=True):
    # detections - single image detection result from a model, this detection object should be from the model inference result
    # image_np - numpy image
    # visualise single image results
    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    # detection box - ymin, xmin, ymax, xmax
    # denormalise using - (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
    image_np_with_detections = image_np.copy()
    
    pil_img = Image.fromarray(image_np_with_detections)
    width, height = pil_img.size
    print("before vis shape ", end='')
    print(width, height)
    
    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'],
          detections['detection_classes'],
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=use_normalized_coordinates,
          max_boxes_to_draw=200,
          min_score_thresh=.50,
          agnostic_mode=False)
    
    #plt.figure()
    
    pil_img = Image.fromarray(image_np_with_detections)
    width, height = pil_img.size
    print("after vis shape ", end='')
    print(width, height)
    
    plt.imshow(image_np_with_detections)
    plt.show()

In [11]:
def get_image_size(image_name, batch_img_tuple):
    # returns the image size by searching image_name in batch_img_tuple
    # input: image_name:"xyz", batch_img_tuple: [(img_name, pil_img)]
    # returns - (width, height)
    return [tup[1] for tup in batch_img_tuple if tup[0] == image_name][0].size
    
def denormalise_boxes(img_size, boxes):
    # returns the normalised boxx coordinates - (ymin, xmin, ymax, xmax)
    # img_size: (width, height), boxes - [[coordinates]...]
    denorm_boxes = []
    width, height = img_size
    for box in boxes:
        temp = []
        temp.append(box[0]*height)
        temp.append(box[1]*width)
        temp.append(box[2]*height)
        temp.append(box[3]*width)
        denorm_boxes.append(temp)
    return np.array(denorm_boxes)

def filter_detections(detections_list, batch_images, denorm = True):
    # purpose - filter passed detections based on threshold, and denormalise box coordinates
    # input - [(image_name, {boxes, scores, classes, ...})]
    # output - [(image_name, {boxes, scores, classes})]
    filtered_detection_list = []
    for detection_tuple in detections_list:
        filtered_dict = {}
        image_name = detection_tuple[0]
        detections = detection_tuple[1]
        
        scores = detections["detection_scores"]
        #scores = [score for score in scores if score > 0.0]
        scores = scores[:100] #take only 100
        take = len(scores)

        boxes = detections["detection_boxes"]
        boxes = boxes[: take]
        
        if denorm:
            #denormalise boxes
            img_size = get_image_size(image_name, batch_images)
            boxes = denormalise_boxes(img_size, boxes)

        classes = detections["detection_classes"]
        classes = classes[: take]
        
        filtered_dict["detection_scores"] = scores
        filtered_dict["detection_boxes"] = boxes
        filtered_dict["detection_classes"] = classes
        
        filtered_detection_list.append((image_name, filtered_dict))
    return filtered_detection_list

In [12]:
def run_inference(model, images_tuple):
    detections_list = detect_objects(model, images_tuple)
    detections_list = process_detections(detections_list)
    
    # process detections object to output list as per your need
    # below method will filter out the boxes based on threshold and also denormalise the coordinates.
    detections_list = filter_detections(detections_list, images_tuple)
    return detections_list

In [13]:
def get_label_name(label_id):
    if label_id not in category_index.keys():
        return "NA"
    return category_index[label_id]["name"]

def prepare_per_image_res(detection_res):
    # detection_res: (image_name, {detection_scores:[], detection_boxes:array(), detection_classes:array()})
    num_detections_per_image = len(detection_res[1]["detection_boxes"])
    rows = []
    for i in range(0, num_detections_per_image):
        image_name = detection_res[0]
        label_id = detection_res[1]["detection_classes"][i]
        label = get_label_name(label_id)
        box = detection_res[1]["detection_boxes"][i]
        score = round(detection_res[1]["detection_scores"][i], 2)
        row = [image_name, label, score, int(box[0]), int(box[1]), int(box[2]), int(box[3])]
        rows.append(row)
    return rows

def store_results(detection_results, pre_processing_seq_name):
    # process detection_results for storing it in the df/csv
    #     round of the bbox coordinates
    df_rows = []
    for res in detection_results:
        rows = prepare_per_image_res(res)
        df_rows.extend(rows)
            
    # append model_name, seq_name at the end of rows
    info = [model_name, pre_processing_seq_name]
    for row in df_rows:
        row.extend(info)
    
    # create a df for storing the results
    df = pd.DataFrame(df_rows, columns = df_columns)
    
    # check if csv file exists, if yes then append result, if not the create and dump the result
    if not os.path.exists(detection_result_path/(model_name + ".csv")):
        # store the df in to model_name.csv file
        df.to_csv(detection_result_path/(model_name + ".csv"), index=False)
    else:
        stored_df = pd.read_csv(detection_result_path/(model_name + ".csv"))
        stored_df = stored_df.append(df)
        stored_df.to_csv(detection_result_path/(model_name + ".csv"), index=False)

In [14]:
model = load_model(model_name, model_dir_path_dict[model_name])

C:\Users\shubham\.keras\datasets\faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8
Loading model...Done! Took 101.96210312843323 seconds


In [15]:
for seq_id in pre_processing_seq_dict.keys():
    print("processing for seq_id:{0} started.".format(seq_id))
    counter = 1
    for batch_images in load_images_batch(50, seq_id):
        print("{0}".format(counter), end="...")
        detection_result = run_inference(model, batch_images)
        store_results(detection_result, seq_id)
        counter = counter + 1
    print("----------------")
print("Inference completed for model:{0}".format(model_name))

processing for seq_id:seq_8 started.
1...2...3...4...5...6...7...8...9...10...11...12...13...14...15...16...17...18...19...20...----------------
processing for seq_id:seq_9 started.
1...2...3...4...5...6...7...8...9...10...11...12...13...14...15...16...17...18...19...20...----------------
processing for seq_id:seq_10 started.
1...2...3...4...5...6...7...8...9...10...11...12...13...14...15...16...17...18...19...20...----------------
processing for seq_id:seq_11 started.
1...2...3...4...5...6...7...8...9...10...11...12...13...14...15...16...17...18...19...20...----------------
processing for seq_id:seq_12 started.
1...2...3...4...5...6...7...8...9...10...11...12...13...14...15...16...17...18...19...20...----------------
processing for seq_id:seq_13 started.
1...2...3...4...5...6...7...8...9...10...11...12...13...14...15...16...17...18...19...20...----------------
processing for seq_id:seq_14 started.
1...2...3...4...5...6...7...8...9...10...11...12...13...14...15...16...17...18...19...20

# SEQ 7 completed, run from seq 8