In [1]:
import os
import pathlib

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display
from PIL import Image, ImageFont, ImageDraw, ImageEnhance

Import the object detection module.

In [2]:
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

import cv2
import time

import linecache
from scipy.spatial import distance

In [3]:
# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1

# Patch the location of gfile
tf.gfile = tf.io.gfile

# Model preparation 

In [4]:
def load_model(model_name):
    #If we want to download a new model
    #base_url = 'http://download.tensorflow.org/models/object_detection/'
    model_file = model_name + '.tar.gz'
    #model_dir = tf.keras.utils.get_file(fname=model_name, origin=base_url + model_file,untar=True)
    model_dir = "saved_models"/pathlib.Path(model_name)/"saved_model"

    model = tf.saved_model.load(str(model_dir))
    model = model.signatures['serving_default']

    return model

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [5]:
# List of the strings that is used to add correct label for each box.

PATH_TO_LABELS = 'F:/Machine_learning/Online-study/computer-vision/person_detection_work/models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

If we want to test the model on multiple images then we can use this..

In [6]:
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('F:/Machine_learning/Online-study/computer-vision/person_detection_work/models/research/object_detection/test_images')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))

# Detection

#### Loading the saved object detection model

In [7]:
model_name = 'ssd_mobilenet_v2_coco_2018_03_29'
detection_model = load_model(model_name)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Add a wrapper function to call the model, and cleanup the outputs:

In [8]:
def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]
    # Run inference
    output_dict = model(input_tensor)
    # print(output_dict.keys())
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {key:value[0, :num_detections].numpy() 
                 for key,value in output_dict.items()}
    output_dict['num_detections'] = num_detections
    
    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
    # Handle models with masks:
    
    if 'detection_masks' in output_dict:
        # print("Detection Masks,,,,,,,",output_dict['detection_masks'])
    # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                  output_dict['detection_masks'], output_dict['detection_boxes'],
                   image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                           tf.uint8)
        output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    return output_dict

Run it on each test image and show the results:

In [9]:
def show_inference(model, image_path):
    # the array based representation of the image will be used later in order to prepare the
    # result image with boxes and labels on it.
    image_np = image_path
    # Actual detection.
    output_dict = run_inference_for_single_image(model, image_np)
    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks_reframed', None),
      use_normalized_coordinates=True,
      line_thickness=8)

    return image_np, output_dict

In [10]:
def find_euclidean_distance(centroid_of_x_axis, centroid_of_y_axis, previous_centroids):
    less_distance = 1000000000000
    assigned_id_ = None
    
    if centroid_of_x_axis >= 50.0 and centroid_of_x_axis <= 92.0 and centroid_of_y_axis >= 1129.0 and centroid_of_y_axis <= 1167.0:
        return 502.0, 3
    elif ((centroid_of_x_axis == 283.8219165802002) and (centroid_of_y_axis == 565.9429550170898)):
        return 502.0, 0
    else:
        for k, v in previous_centroids["ID"].items():
            centroids = k.split(',')
            pc1 = float(centroids[0])
            pc2 = float(centroids[1])

            d = distance.euclidean((pc1, pc2),(centroid_of_x_axis,centroid_of_y_axis))

            if d<less_distance:
                less_distance = d
                assigned_id_ = v

        return less_distance, assigned_id_

In [11]:
def get_boxes(op_dict):
    boxes = []
    for i in range(op_dict["num_detections"]):
        if op_dict["detection_scores"][i] > 0.5:
            boxes.append(op_dict["detection_boxes"][i])
    return boxes

In [12]:
def get_x_y_center(box, im_width, im_height):
    ymin, xmin, ymax, xmax = box
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)

    # Calculate height and width of the bounding box
    height = bottom - top
    width = right - left
    # Calculating the centroids of the bounding box's X and Y axis

    half_height = height/2
    half_width = width/2
    centroid_of_x_axis = top+half_height 
    centroid_of_y_axis = left+half_width
    return centroid_of_x_axis, centroid_of_y_axis, left, right, top, bottom

In [13]:
def check_for_id_fluctualtion(id_details, key_val, new_ids, frame):
    print("Before Adding a new id we have to check for ID fluctuation................")
    distances_from_previous_idd_centroids = []
    least_distance = 100000000000000
    index = 0
    for id_d in id_details:
        previous_centroids = id_d["previous_centroid"].split(',')
        last_seen_frame = id_d["last_seen_on_frame"]
        frame_difference = frame - last_seen_frame
        prv_c1 = float(previous_centroids[0])
        prv_c2 = float(previous_centroids[1])
        current_centroid = key_val.split(",")
        crnt_c1 = float(current_centroid[0])
        crnt_c2 = float(current_centroid[1])
        d = distance.euclidean((prv_c1, prv_c2),(crnt_c1,crnt_c2))
        if d<least_distance and frame_difference < 10:
            least_distance = d
            temp_assigned_id = id_d["id"]
            index_of_assigned_id = index
        index += 1
    if least_distance > 150:
        return False ,new_ids, id_details
    
    new_ids[key_val] = temp_assigned_id
    return True, new_ids, id_details

In [14]:
def update_id_details(id_details, new_ids, frame):
    for centroid, id_ in new_ids.items():
        id_details[id_]["previous_centroid"] = centroid
        id_details[id_]["last_seen_on_frame"] = frame
    return id_details

In [15]:
def register_new_id(distance_to_all_points, new_ids, last_used_id, centroids, id_details, frame):
    """
    If the centroid is having distancing from all the points greter than some threshold,
    then this function will add new ID to that bounding box.
    """
    all_points = len(distance_to_all_points)
    counter_for_adding_new_id = 0
    index = 0
    for prev_dist in distance_to_all_points:
        if prev_dist > 150:
            counter_for_adding_new_id += 1
        index += 1
    if all_points == counter_for_adding_new_id:
        fount_fluctuation, new_ids, id_details = check_for_id_fluctualtion(id_details, centroids, new_ids, frame)
        if not fount_fluctuation:
            new_ids[centroids] = last_used_id
            id_details.append({'id': last_used_id, 'last_seen_on_frame': frame, 'previous_centroid': centroids})
            last_used_id += 1
    
    return new_ids, last_used_id, id_details

In [16]:
def find_least_distance(centroid, id_, box, distance_to_all_points, least_distance, temp_distance_keeper, new_ids, tmp_assigned_id):
    '''
    This function is responsible for finding nearest previous centroid of the current bounding box's centroid.
    Taks input as 
    centroid                 | string : centroids of the previous bounding boxes
    id_                      | int : id assigned to previous centroid
    box                      | tuple : location of current bounding box
    distance_to_all_points   | list of distance calculated to all the previous centroids
    least_distance           | int: least distance among previously calculated distances with all the previous centroids for the current bounding box
    temp_distance_keeper     | list of dictionalry which holds id, distance, current_centroids, previous_centroid 
                             | (This will be helpful when we will have more that one points having assigned same id)
    new_ids                  | List of assigned ids for each bounding boxes till now.
    '''
    try:
        centroids = centroid.split(',')
        pc1 = float(centroids[0])
        pc2 = float(centroids[1])
        d = distance.euclidean((pc1, pc2),(box[0],box[1]))
        distance_to_all_points.append(d)
        if d<least_distance:
            least_distance = d
            tmp_assigned_id = id_
            key_val = str(box[0]) + "," + str(box[1])
            same_id_found = 0
            for tmp_dist in temp_distance_keeper:
                '''
                This checks if id is already assigned before, if id is assigned then check the distance of the,
                2 centroids and chooses the point which have least distance.
                '''
                if tmp_assigned_id == tmp_dist["id"]:
                    same_id_found = 1
                    if least_distance < tmp_dist["distance"]:
                        tmp_dist["distance"] = least_distance
                        tmp_dist["current_point"] = key_val
                        new_ids[key_val] = tmp_dist["id"]
            if not same_id_found:
                temp_distance_keeper.append({"id":tmp_assigned_id, "distance":least_distance, "current_point":key_val, " previous_point ":centroid})
                new_ids[key_val] = tmp_assigned_id
        return new_ids, temp_distance_keeper, distance_to_all_points, least_distance, tmp_assigned_id
    except:
        exc_type, exc_obj, tb = sys.exc_info()
        f = tb.tb_frame
        lineno = tb.tb_lineno
        filename = f.f_code.co_filename
        linecache.checkcache(filename)
        line = linecache.getline(filename, lineno, f.f_globals)
        print('find_least_distance EXCEPTION IN LINE ', lineno , " exception : " , exc_obj)
        return {}, [], distance_to_all_points, 1000000000000000, None

In [17]:
def find_nearest_point(boxes, previous_centroids, last_used_id, im_width, im_height, image_pil, font, id_details, frame):
    try:
        Ids = previous_centroids["id"]
        previous_ids = previous_centroids["id"].values()
        temp_distance_keeper = []
        new_ids = {}

        for box in boxes:

            centroid_of_x_axis, centroid_of_y_axis, left, right, top, bottom = get_x_y_center(box, im_width, im_height)
            box = (centroid_of_x_axis, centroid_of_y_axis)

            height = bottom - top
            width = right - left

            distance_to_all_points = []
            least_distance = 100000000000000
            tmp_assigned_id = None
            key_val = str(box[0]) + "," + str(box[1])
            for centroid, id_ in Ids.items():
                new_ids, temp_distance_keeper, distance_to_all_points, least_distance, tmp_assigned_id = find_least_distance(centroid, id_, box, distance_to_all_points, least_distance, temp_distance_keeper, new_ids, tmp_assigned_id)

            new_ids, last_used_id, id_details = register_new_id(distance_to_all_points, new_ids, last_used_id, key_val, id_details, frame)
            id_details = update_id_details(id_details, new_ids, frame)
            try:
                assigned_id = new_ids[key_val]
            except:
                pass
                        
            draw = ImageDraw.Draw(image_pil)
            draw.ellipse((centroid_of_y_axis, centroid_of_x_axis, centroid_of_y_axis+10, centroid_of_x_axis+10), fill = 'yellow',outline="yellow")

            draw.rectangle([(centroid_of_y_axis-20, centroid_of_x_axis), (centroid_of_y_axis+50, centroid_of_x_axis+30)], width=1, fill=(255, 0, 0))
            draw.text((centroid_of_y_axis-20, centroid_of_x_axis), "Id: " + str(assigned_id),fill=(0, 255, 255),font=font,align="center")

        return new_ids, last_used_id, image_pil, id_details
    except:
        exc_type, exc_obj, tb = sys.exc_info()
        f = tb.tb_frame
        lineno = tb.tb_lineno
        filename = f.f_code.co_filename
        linecache.checkcache(filename)
        line = linecache.getline(filename, lineno, f.f_globals)
        print('find_nearest_point EXCEPTION IN LINE ', lineno , " exception : " , exc_obj)
        return 0, 0, 0

In [18]:
def for_first_frame(boxes, im_width, im_height, image_pil, font, id_details, frame):
    '''
    This function assigns ids to all detected number of bounding boxes found in first frame of the video.
    takes input of location of the bounding box.
    returns dictionary in which key is centroid of the bounding box and value is assigned id.
    '''
    
    temp_id = 0
    id_list = {}
    for box in boxes:
        centroid_of_x_axis, centroid_of_y_axis, left, right, top, bottom = get_x_y_center(box, im_width, im_height)
        box = (centroid_of_x_axis, centroid_of_y_axis)
        
        height = bottom - top
        width = right - left
        centroid = str(box[0]) + "," + str(box[1])
        id_list[centroid] = temp_id
        
        id_details.append({"id":temp_id, "last_seen_on_frame": frame, "previous_centroid": centroid})
        
        draw = ImageDraw.Draw(image_pil)
        draw.ellipse((centroid_of_y_axis, centroid_of_x_axis, centroid_of_y_axis+10, centroid_of_x_axis+10), fill = 'yellow',outline="yellow")
        
        draw.rectangle([(centroid_of_y_axis-20, centroid_of_x_axis), (centroid_of_y_axis+50, centroid_of_x_axis+30)], width=1, fill=(255, 0, 0))
        draw.text((centroid_of_y_axis-20, centroid_of_x_axis), "Id: " + str(temp_id),fill=(0, 255, 255),font=font,align="center")
        temp_id += 1
    return id_list, temp_id, image_pil, id_details

In [23]:
vid = "F:/Machine_learning/Online-study/computer-vision/video/sliced/person-tracking-slice.mp4"

In [24]:
cap = cv2.VideoCapture(vid)

# For calculatig seek time
fps = cap.get(cv2.CAP_PROP_FPS)      
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = frame_count/fps
id_details = []

Ids = []
previous_id = 0

font = ImageFont.truetype('arial.ttf', 30)
while(cap.isOpened()):
    #success is boolean and image contains frame of the video
    try:
        success, vimg = cap.read()
        if success:
            img, op_dict = show_inference(detection_model, vimg)
            frame_count = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
            print("------------",frame_count,"--------------")
            seek_time = cap.get(cv2.CAP_PROP_POS_MSEC)/1000

            if seek_time >= 1.6:
                print("Pass")
                #break

            # Fetching the bounding box coordinates
            boxes = get_boxes(op_dict)
            image_pil = Image.fromarray(np.uint8(img)).convert('RGB')
            im_width, im_height = image_pil.size

            if frame_count == 1:
                id_list, previous_id, image_pil, id_details = for_first_frame(boxes, im_width, im_height, image_pil, font, id_details, frame_count)
                Ids.append({"frame":frame_count, "id":id_list})
            else:
                id_list, previous_id, image_pil, id_details = find_nearest_point(boxes, Ids[frame_count-2], previous_id, im_width, im_height, image_pil, font, id_details, frame_count)
                Ids.append({"frame":frame_count, "id":id_list})

            np.copyto(img, np.array(image_pil))
            vimg = cv2.resize(img, (1100,620))
            #vimg = cv2.putText(vimg, str(seek_time), (20,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4)

            cv2.imshow("Frame",vimg)
            #time.sleep(1)
            key = cv2.waitKey(1)
            if key == 27:
                break
        else:
            break
    except Exception as e:
        exc_type, exc_obj, tb = sys.exc_info()
        f = tb.tb_frame
        lineno = tb.tb_lineno
        print('EXCEPTION IN LINE ', lineno , " exception : " , exc_obj)
        cap.release()
        cv2.destroyAllWindows()

cap.release()
cv2.destroyAllWindows()


------------ 1 --------------
------------ 2 --------------
------------ 3 --------------
------------ 4 --------------
------------ 5 --------------
------------ 6 --------------
------------ 7 --------------
------------ 8 --------------
------------ 9 --------------
------------ 10 --------------
------------ 11 --------------
------------ 12 --------------
------------ 13 --------------
------------ 14 --------------
------------ 15 --------------
------------ 16 --------------
------------ 17 --------------
------------ 18 --------------
------------ 19 --------------
Before Adding a new id we have to check for ID fluctuation................
------------ 20 --------------
------------ 21 --------------
Before Adding a new id we have to check for ID fluctuation................
------------ 22 --------------
------------ 23 --------------
------------ 24 --------------
------------ 25 --------------
------------ 26 --------------
------------ 27 --------------
------------ 28 ------

In [24]:
Ids

[{'frame': 1,
  'id': {'656.2119626998901,1055.357551574707': 0,
   '736.8079447746277,259.6186065673828': 1}},
 {'frame': 2, 'id': {'637.1624583005905,981.4447116851807': 0}},
 {'frame': 3,
  'id': {'650.4353213310242,861.7412281036377': 0,
   '739.2024922370911,1377.5107955932617': 2,
   '729.0523320436478,273.4348440170288': 1}},
 {'frame': 4,
  'id': {'646.8750590085983,850.3392934799194': 0,
   '740.1929998397827,1364.461326599121': 2,
   '735.2768808603287,272.7965211868286': 1}},
 {'frame': 5,
  'id': {'737.3900055885315,1277.5520324707031': 2,
   '656.6669350862503,854.4773483276367': 0,
   '735.9396654367447,287.1083450317383': 1}},
 {'frame': 6,
  'id': {'642.2559732198715,849.7622537612915': 0,
   '721.8576818704605,1273.2183837890625': 2}},
 {'frame': 7,
  'id': {'640.5086159706116,878.1588935852051': 0,
   '726.425912976265,1270.1969146728516': 2,
   '717.0948189496994,272.72621154785156': 1}},
 {'frame': 8,
  'id': {'719.4577753543854,1254.210433959961': 2,
   '642.205617

In [25]:
id_details

[{'id': 0,
  'last_seen_on_frame': 38,
  'previous_centroid': '596.5934729576111,1076.4179992675781'},
 {'id': 1,
  'last_seen_on_frame': 32,
  'previous_centroid': '543.5504615306854,208.411967754364'},
 {'id': 2,
  'last_seen_on_frame': 12,
  'previous_centroid': '675.8807516098022,1202.2822952270508'},
 {'id': 3,
  'last_seen_on_frame': 68,
  'previous_centroid': '457.5917726755142,1315.2182006835938'},
 {'id': 4,
  'last_seen_on_frame': 41,
  'previous_centroid': '808.4495759010315,1169.3374156951904'},
 {'id': 5,
  'last_seen_on_frame': 70,
  'previous_centroid': '455.6034994125366,1472.1228790283203'},
 {'id': 6,
  'last_seen_on_frame': 47,
  'previous_centroid': '444.8562955856323,1780.0206756591797'},
 {'id': 7,
  'last_seen_on_frame': 63,
  'previous_centroid': '468.5053163766861,1819.130630493164'},
 {'id': 8,
  'last_seen_on_frame': 61,
  'previous_centroid': '384.76888060569763,991.9725322723389'},
 {'id': 9,
  'last_seen_on_frame': 60,
  'previous_centroid': '207.031602859

### For image

In [24]:
img = cv2.imread("rugby.png")
img, op_dict = show_inference(detection_model, img)
boxes = []
min_threshold = 0.55
for i in range(op_dict["num_detections"]):
    if op_dict["detection_scores"][i] > min_threshold:
        boxes.append(op_dict["detection_boxes"][i])

image_pil = Image.fromarray(np.uint8(img)).convert('RGB')
im_width, im_height = image_pil.size

for box in boxes:
    ymin, xmin, ymax, xmax = box
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
    
    height = bottom - top
    width = right - left
    half_height = height/2
    half_width = width/2
    centroid_of_x_axis = top+half_height 
    centroid_of_y_axis = left+half_width

    draw = ImageDraw.Draw(image_pil)
    draw.rectangle([(left, top), (right, bottom)], width=1)
    draw.ellipse((centroid_of_y_axis, centroid_of_x_axis, centroid_of_y_axis+10, centroid_of_x_axis+10), fill = 'yellow')
    
    np.copyto(img, np.array(image_pil))

image_pil.show()
cv2.imshow("Image",img)
cv2.waitKey(0)
cv2.destroyAllWindows()