# Object Tracking

Object tracking is a computer vision problem that invovles `Detection` of objects(features) and `Tracking`.

> `Detection` referes to the task of observing a scene and finding points of interest within the scene. 

> `Tracking` refers to the task of predicting future positions of a `detected object`, using it's previous known `states`

In [1]:
import cv2
import numpy as np
from deep_sort.tracker import Tracker
import tensorflow as tf

In [2]:
# Set up the file along with filepaths
import os

filename="traffic_unedited.mp4"

current_dir = os.getcwd()
video_path = os.path.join(current_dir, filename)

## Deep in DeepSORT

In DeepSort, a pre-trained deep learning model is responsible for feature extraction as well as returning the bounding boxes that enclose the features. 

In our implementation, since the bounding box is labeled by the User for the first frame, we will not be using an object detection algorithm. Instead we will `MobileNetv2` as a `Feature Extractor`

This will help the Kalman Filter in the SORT algorithm to update it's weights more effectively. 

Input shape into the Feature Detector will play a huge role

In [3]:
# Keeping pretrained model ready to derive feature vector for objects within the bounding boxes
from tensorflow.keras.applications import MobileNetV2

class FeatureDetector():
    
    detector = None

    def __init__(self):
        pass

    def create_detector(self):
        self.detector = MobileNetV2(input_shape=(128, 128, 3), weights="imagenet")
        print(self.detector)

    # Get Feature vector from model.
    def predict(self, cropped_image):
        return self.detector.predict(cropped_image)

    def _standardize_input(self, cropped_img):
        data_augmentation = tf.keras.Sequential([
            tf.keras.layers.Rescaling(1./255)
        ])
        rescaled_img = data_augmentation(cropped_img)
        return rescaled_img

In [4]:
def crop_from_bbox(frame, bbox):
    """
    Takes in the Frame and bounding box.
    Returns the Cropped Image with bbox height and width.
    """
    x_start = bbox[0]
    y_start = bbox[1]
    width = bbox[2]
    height = bbox[3]
    print(f"end x: {x_start+ width}\nend y: {y_start+height}")
    cropped_img = frame[ y_start:y_start+height,x_start:x_start+width]
    print(cropped_img.shape)
    return cropped_img

def resize_to_model_input(cropped_image):
    resized_image = cv2.resize(cropped_image, dsize=(128, 128))
    return resized_image


In [5]:
# Draw the bounding box in the first frame
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(total_frames)

400


In [6]:
ret, frame = cap.read()
cv2.imshow("Annotation", frame)

bbox = cv2.selectROI("Annotation", frame, fromCenter=False, showCrosshair=False)
##bbox returned in the format
# x_start, y_start, width, height
cv2.destroyAllWindows()
# bbox = [bbox[0]]
bbox

(389, 34, 73, 66)

In [7]:
cropped_image = crop_from_bbox(frame, bbox)
# cv2.imshow("Cropped Image", cropped_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

end x: 462
end y: 100
(66, 73, 3)


In [8]:
resized_img = resize_to_model_input(cropped_image)
# cv2.imshow("Resized Image", resized_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

Now that we have rescaled and resized our image for the model, our `helper functions` are working as expected. 
We are ready to extract a feature vector given the bounding box.

In [9]:
from deep_sort.nn_matching import NearestNeighborDistanceMetric

In [10]:
# Intialise the tracker object
metric = NearestNeighborDistanceMetric("cosine", 0.5)
tracker = Tracker(metric=metric, n_init=30)

In [11]:
bbox = list(bbox)

In [12]:
raw_detection = [[[bbox[0], bbox[1], bbox[2], bbox[3]], 1.0]]

In [13]:
np.expand_dims(resized_img, axis=0).shape

(1, 128, 128, 3)

In [14]:
from deep_sort.detection import Detection

In [15]:
with tf.compat.v1.Session() as sess:
    feature_detector = FeatureDetector()
    feature_detector.create_detector()
    feature_vec = feature_detector.predict(np.expand_dims(resized_img, axis=0))
    raw_detections = [Detection(bbox, 1.0, feature_vec)]
    subsequent_bbox = []
    # tracker.predict()
    tracker.update(raw_detections)
    # for frame_index in range(total_frames)[1:5]:
    #     cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
    #     ret, frame = cap.read()
    #     if ret:
    #         bb = cv2.selectROI("Annotate frame", frame, fromCenter=False, showCrosshair=False)
    #         cv2.destroyAllWindows()
    #         subsequent_bbox.append(bb)
    # for bb in subsequent_bbox:
    #     print("here")
    #     raw_detections = [Detection(bb, 1.0, feature_vec)]
        # tracker.predict()
        # tracker.update(raw_detections)

Instructions for updating:
Colocations handled automatically by placer.
<keras.engine.functional.Functional object at 0x000001F85B5A9840>


  updates=self.state_updates,


det [<deep_sort.detection.Detection object at 0x000001F858AF66E0>]


In [16]:
%env KMP_DUPLICATE_LIB_OK=TRUE

env: KMP_DUPLICATE_LIB_OK=TRUE


In [17]:
# Add the detection to the tracker

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    tracker.predict()
    for track in tracker.tracks:
        bbox_updated = track.to_tlbr().astype(int)
        print(track.state)
        print(bbox_updated)
        if not track.is_confirmed():
            continue
        cv2.rectangle(frame,
                      (bbox_updated[0], bbox_updated[1]),
                      (bbox_updated[2], bbox_updated[3]),
                      (255, 0, 0)
                    )
        # cv2.putText(frame,
        #             str(track.track_id),

        #             
    cv2.imshow("Tracking", frame)
    key = cv2.waitKey(30)
    if key == ord('q') or key==27:
        break
cap.release()
cv2.destroyAllWindows()

1
[388  34 462 100]
det [<deep_sort.detection.Detection object at 0x000001F858AF66E0>, <deep_sort.detection.Detection object at 0x000001F85A203220>]
1
[389  33 461 104]
det [<deep_sort.detection.Detection object at 0x000001F858AF66E0>, <deep_sort.detection.Detection object at 0x000001F85A203220>, <deep_sort.detection.Detection object at 0x000001F85A203280>]
1
[388  34 462 100]
det [<deep_sort.detection.Detection object at 0x000001F858AF66E0>, <deep_sort.detection.Detection object at 0x000001F85A203220>, <deep_sort.detection.Detection object at 0x000001F85A203280>, <deep_sort.detection.Detection object at 0x000001F85A1A7730>]
1
[389  33 460 104]
det [<deep_sort.detection.Detection object at 0x000001F858AF66E0>, <deep_sort.detection.Detection object at 0x000001F85A203220>, <deep_sort.detection.Detection object at 0x000001F85A203280>, <deep_sort.detection.Detection object at 0x000001F85A1A7730>, <deep_sort.detection.Detection object at 0x000001F85A1A7C70>]
1
[392  37 460 100]
det [<deep_s

  ret[2] /= ret[3]


det [<deep_sort.detection.Detection object at 0x000001F858AF66E0>, <deep_sort.detection.Detection object at 0x000001F85A203220>, <deep_sort.detection.Detection object at 0x000001F85A203280>, <deep_sort.detection.Detection object at 0x000001F85A1A7730>, <deep_sort.detection.Detection object at 0x000001F85A1A7C70>, <deep_sort.detection.Detection object at 0x000001F858D925C0>, <deep_sort.detection.Detection object at 0x000001F85A1D61A0>, <deep_sort.detection.Detection object at 0x000001F85A1D4400>, <deep_sort.detection.Detection object at 0x000001F85A22F880>, <deep_sort.detection.Detection object at 0x000001F85A1D6440>, <deep_sort.detection.Detection object at 0x000001F85B499BD0>, <deep_sort.detection.Detection object at 0x000001F85A22DA80>, <deep_sort.detection.Detection object at 0x000001F85B49BEB0>, <deep_sort.detection.Detection object at 0x000001F85A4475E0>, <deep_sort.detection.Detection object at 0x000001F858C2DAE0>, <deep_sort.detection.Detection object at 0x000001F858C2C2E0>, <de

ValueError: matrix contains invalid numeric entries