### Imports

In [1]:
import os
import pathlib

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from PIL import Image
from IPython.display import display
from PIL import Image, ImageFont, ImageDraw, ImageEnhance

Import the object detection module.

In [2]:
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

In [3]:
import cv2

In [4]:
# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1

# Patch the location of gfile
tf.gfile = tf.io.gfile

# Model preparation 

In [18]:
def load_model(model_name):
    #If we want to download a new model
    #base_url = 'http://download.tensorflow.org/models/object_detection/'
    model_file = model_name + '.tar.gz'
    #model_dir = tf.keras.utils.get_file(fname=model_name, origin=base_url + model_file,untar=True)
    model_dir = "saved_models"/pathlib.Path(model_name)/"saved_model"

    model = tf.saved_model.load(str(model_dir))
    model = model.signatures['serving_default']

    return model

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [6]:
# List of the strings that is used to add correct label for each box.

PATH_TO_LABELS = 'F:/Machine_learning/Online-study/computer-vision/person_detection_work/models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

If we want to test the model on multiple images then we can use this..

In [7]:
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('F:/Machine_learning/Online-study/computer-vision/person_detection_work/models/research/object_detection/test_images')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))

# Detection

#### Loading the saved object detection model

In [20]:
model_name = 'ssd_mobilenet_v2_coco_2018_03_29'
detection_model = load_model(model_name)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Add a wrapper function to call the model, and cleanup the outputs:

In [21]:
def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]
    # Run inference
    output_dict = model(input_tensor)
    # print(output_dict.keys())
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key:value[0, :num_detections].numpy() 
                 for key,value in output_dict.items()}
    output_dict['num_detections'] = num_detections
    
    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
    # Handle models with masks:
    
    if 'detection_masks' in output_dict:
        # print("Detection Masks,,,,,,,",output_dict['detection_masks'])
    # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                  output_dict['detection_masks'], output_dict['detection_boxes'],
                   image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                           tf.uint8)
        output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    return output_dict

Run it on each test image and show the results:

In [22]:
def show_inference(model, image_path):
    # the array based representation of the image will be used later in order to prepare the
    # result image with boxes and labels on it.
    image_np = image_path
    # Actual detection.
    output_dict = run_inference_for_single_image(model, image_np)
    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks_reframed', None),
      use_normalized_coordinates=True,
      line_thickness=8)

    return image_np, output_dict

In [25]:
vid = "F:/Machine_learning/WORK/video/ball-possesion5.mp4"

In [26]:
cap = cv2.VideoCapture(vid)

fps = cap.get(cv2.CAP_PROP_FPS)
# OpenCV2 version 2 used "CV_CAP_PROP_FPS"
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = frame_count/fps

while(cap.isOpened()):
    #success is boolean and image contains frame of the video
    success, vimg = cap.read()
    if success:
        
        img, op_dict = show_inference(detection_model, vimg)
        
        boxes = []
        for i in range(op_dict["num_detections"]):
            if op_dict["detection_scores"][i] > 0.5:
                boxes.append(op_dict["detection_boxes"][i])
        seek_time = cap.get(cv2.CAP_PROP_POS_MSEC)/1000
        #print("--------------------SEEK TIME(sec)----------------------\n",seek_time)
        #print("\n------------------------------------------")
        vimg = cv2.resize(vimg, (1100,620))
        vimg = cv2.putText(vimg, str(seek_time), (20,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4)
        cv2.imshow("Frame",vimg)
        im_width = vimg.shape[0]
        im_height = vimg.shape[1]
        for box in boxes:
            ymin, xmin, ymax, xmax = box
            (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
            #left border, top border, right border, bottom border
        
        key = cv2.waitKey(1)
        if key == 27:
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()


#### Detecting the person from image with bounding boxes

In [32]:
img = cv2.imread("rugby.png")
img, op_dict = show_inference(detection_model, img)
cv2.imshow("asd",img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Before if .... dict_keys(['detection_scores', 'detection_classes', 'detection_boxes', 'num_detections'])
