In [1]:
import time
import cv2
import pickle as pkl
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from IPython import display as dp
from collections import deque
import matplotlib.animation as animation

In [2]:
def drawRectangleOnImage(img, bbox, track=0, line=[], colour=(0, 255, 0)):
    b, g, r = colour
    img = cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (int(b), int(g), int(r)), 3)
    img = cv2.putText(img, str(track), (bbox[0], bbox[1] - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (int(b), int(g), int(r)), 3)
    for i in range(1, len(line)):
        img = cv2.line(img, line[i - 1], line[i], (int(b), int(g), int(r)), 3)
    return img

In [3]:
def get_box_center(bbox):
    center = (int(bbox[0]) + ((int(bbox[2]) - int(bbox[0])) // 2)), (int(bbox[1]) + ((int(bbox[3]) - int(bbox[1])) // 2)) # X + Width / 2, Y + Height / 2
    return center

In [4]:
# Detection Pre-Processing 

# Remove overlaps in the same frame
def remove_overlaps(detections_pd, tolerance=0.9):
    row_to_remove = []
    for detection in detections_pd.get('detection'):
        length, _ =detections_pd.shape
        for i in range(length):
            IoU = detection.IoU(detections_pd.iloc[i]['detection'])
            if IoU > tolerance and IoU < 1:
                row_to_remove.append(i)
                
    row_to_remove = np.unique(np.array(row_to_remove))
    detections_pd = detections_pd.drop(index=row_to_remove)
        
    return detections_pd

In [5]:
# Detection to DataFrame
def get_detection_dataframe(detections):
    bboxes = []
    bsizes = []
    lines = deque(maxlen=32)
    bdetections = []
    tracks = list(range(0, len(detections)))
    colours = []
    for i in range(len(detections)):
        colours.append(tuple(np.random.choice(range(256), size=3).astype('int')))
        
    updated = [False]*len(detections)
    
    for detection in detections:
        bbox = np.array(detection.getBBox()).astype('int')
        bboxes.append(bbox)
        
        centers = []
        centers.append(get_box_center(bbox))
        lines.append(centers)

        bsize = int(detection.areaOfRec())
        bsizes.append(bsize)

        bdetections.append(detection)

    detec = {
        'track': tracks,
        'detection': bdetections,
        'bbox': bboxes,
        'size': bsizes,
        'line': lines,
        'colour': colours,
        'updated': updated
    }
    detections_pd = pd.DataFrame(detec)
    detections_pd = detections_pd.sort_values(by=['size'], ascending=False)
    detections_pd = detections_pd.reset_index(drop=True)
    
    return detections_pd

In [6]:
def update_track(detections_pd, next_detections_pd, tolerance=0.5):
    detections_pd['updated'] = False
    detections_pd = detections_pd.reset_index(drop=True)
    
    # Loop each new detection
    for index, next_detection in next_detections_pd.iterrows():
        length, _ = detections_pd.shape
        
        # Find overlaps with max IoU and update if found
        IoUlist = []
        for i in range(length):
            IoU = next_detection['detection'].IoU(detections_pd.iloc[i]['detection'])
            IoUlist.append(IoU)
            
        indexMax = IoUlist.index(max(IoUlist))
            
        if max(IoUlist) > 0.5 and detections_pd.at[indexMax,'updated'] != True:
            detections_pd.at[indexMax,'detection'] = next_detection['detection']
            detections_pd.at[indexMax,'bbox'] = next_detection['bbox']
            detections_pd.at[indexMax,'size'] = next_detection['size']
            detections_pd.at[indexMax,'line'].append(next_detection['line'][0])
            detections_pd.at[indexMax,'updated'] = True
            next_detections_pd.at[index, 'updated'] = True
    
    # Drop detections no longer exist
    detections_pd = detections_pd[detections_pd['updated'] == True]
                
    # Start tracking new detections
    counter = 0
    if any(next_detections_pd['updated'] == False):
        new_pd = next_detections_pd[next_detections_pd['updated']==False]
        new_pd = new_pd.reset_index(drop=True)
        
        # Generate new track number      
        for i in range(len(new_pd)):
            while counter in detections_pd['track'].tolist():
                counter = counter + 1
            new_pd.at[i, 'track'] = counter
            counter = counter + 1

        # Add new tracks
        detections_pd = pd.concat([detections_pd, new_pd])
            
    detections_pd = detections_pd.reset_index(drop=True)
    return detections_pd

In [7]:
# Load paths
# detection_path = 'detection_pkls/retinanet_101_detections.pkl'
detection_path = 'detection_pkls/maskRCNN_101_detections.pkl'
data_path = '../datasets/AICity_data/train/S03/c010/'

# Load detections
with open(detection_path , 'rb') as f:
    all_detections = pkl.load(f)

In [8]:
# Pre-process the first frame if needed
# detections_pd = remove_overlaps(detections_pd, 0.5)
# detections_pd

In [9]:
display = False
gif = False

if gif:
    fig, ax = plt.subplots()
    plt.axis('off')

ims = []
detection_history = []

# Get the First Frame
detections_pd = get_detection_dataframe(all_detections['0'])
detection_history.append(detections_pd)

# Load video
vidcap = cv2.VideoCapture(data_path + 'vdo.avi')
_, image = vidcap.read()
num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

# Set plot
if display:
    plt.ion() 
    plt.figure(figsize=(20, 12))
    plt.axis('off') 

# Iterate Frames
skip = 4
for frame in tqdm(range(1,num_frames//skip)):
    for i in range(skip):
        _, image = vidcap.read()
    frame = frame*skip

# for frame in tqdm(range(1,num_frames-1)):
#     _, image = vidcap.read()
    
    next_detections_pd = get_detection_dataframe(all_detections[str(frame)])
    detections_pd = update_track(detections_pd, next_detections_pd, tolerance=0.5)
    detection_history.append(detections_pd)
    
    for index, row in detections_pd.iterrows():
        image = drawRectangleOnImage(image, row['bbox'], track=row['track'], line=row['line'],colour=row['colour'])
        
    if display:
        plt.title('Frame:'+str(frame))
        plt.imshow(image)  
    
    if gif:
        im = ax.imshow(image, animated=True)
        ims.append([im])
        
    if display:
        dp.clear_output(wait=True)
        dp.display(plt.gcf())
        time.sleep(0.000001)
        plt.cla()

if gif:
    ani = animation.ArtistAnimation(fig, ims, interval=10, blit=True, repeat_delay=10000)
    ani.save('Tracking_with_IoU_overlap' + ".gif", writer=animation.PillowWriter(fps=24))
    
# Open a file and use dump()
with open('tracking_history/tracking_history.pkl', 'wb') as file:
    # A new file will be created
    pkl.dump(detection_history, file)

100%|██████████| 534/534 [00:14<00:00, 36.11it/s]


# TEST

In [10]:
# with open('tracking_history/tracking_history.pkl', 'rb') as file:
#     # Call load method to deserialze
#     tracking_hist = pkl.load(file)

In [21]:
# tracking_hist[300]

Unnamed: 0,track,detection,bbox,size,line,colour,updated
0,0,"Frame 1200, TL [1286.4302978515625,363.1813049...","[1286, 363, 1521, 543]",42519,"[(1403, 451), (1404, 452), (1405, 452), (1404,...","(32, 238, 237)",False
1,4,"Frame 1200, TL [1179.3360595703125,101.5149078...","[1179, 101, 1229, 162]",3077,"[(1203, 130), (1203, 129), (1203, 130), (1203,...","(89, 96, 28)",False
2,2,"Frame 1200, TL [927.9996337890625,78.085601806...","[927, 78, 1017, 146]",6147,"[(979, 111), (975, 111), (975, 112), (973, 112...","(121, 22, 38)",False
3,7,"Frame 1200, TL [585.859619140625,75.2061691284...","[585, 75, 657, 112]",2660,"[(620, 93), (621, 93), (620, 93), (621, 93), (...","(32, 229, 89)",False
4,8,"Frame 1200, TL [884.114501953125,92.7830734252...","[884, 92, 942, 142]",2903,"[(913, 117), (911, 117), (912, 117), (909, 118...","(228, 46, 137)",False
5,5,"Frame 1200, TL [563.5025634765625,96.675804138...","[563, 96, 659, 170]",7039,"[(610, 134), (611, 134), (611, 134), (610, 134...","(198, 178, 172)",False
6,1,"Frame 1200, TL [945.3429565429688,221.46107482...","[945, 221, 1135, 381]",30421,"[(1146, 992), (1146, 937), (1134, 891), (1125,...","(66, 31, 95)",False
7,3,"Frame 1200, TL [500.8360290527344,580.22784423...","[500, 580, 808, 839]",79795,"[(654, 709)]","(97, 224, 177)",False
8,6,"Frame 1200, TL [906.6154174804688,99.351150512...","[906, 99, 940, 142]",1450,"[(923, 120)]","(152, 106, 238)",False


In [12]:
# tracking_hist[100]

Unnamed: 0,track,detection,bbox,size,line,colour,updated
0,0,"Frame 400, TL [1289.586669921875,357.427551269...","[1289, 357, 1520, 544]",43148,"[(1403, 451), (1404, 452), (1405, 452), (1404,...","(32, 238, 237)",False
1,1,"Frame 400, TL [561.9972534179688,95.6774749755...","[561, 95, 661, 168]",7263,"[(611, 132), (611, 133), (610, 133), (610, 133...","(103, 26, 233)",False
2,4,"Frame 400, TL [1178.5264892578125,101.55039215...","[1178, 101, 1228, 160]",2951,"[(1203, 130), (1203, 129), (1203, 130), (1203,...","(89, 96, 28)",False
3,3,"Frame 400, TL [584.4474487304688,73.7105865478...","[584, 73, 653, 101]",1887,"[(621, 91), (622, 91), (622, 91), (622, 91), (...","(95, 16, 145)",False
4,2,"Frame 400, TL [923.1637573242188,77.7798690795...","[923, 77, 1016, 146]",6384,"[(979, 111), (975, 111), (975, 112), (973, 112...","(121, 22, 38)",False
5,5,"Frame 400, TL [882.3162841796875,93.3272247314...","[882, 93, 940, 141]",2789,"[(922, 115), (916, 116), (911, 117), (914, 116...","(198, 173, 161)",False
6,6,"Frame 400, TL [715.5228881835938,82.7331924438...","[715, 82, 790, 141]",4400,"[(675, 127), (680, 126), (684, 125), (689, 124...","(51, 64, 192)",False
7,9,"Frame 400, TL [671.905517578125,99.42324066162...","[671, 99, 728, 141]",2377,"[(673, 126), (678, 125), (687, 123), (695, 122...","(241, 88, 137)",False
8,7,"Frame 400, TL [549.6688842773438,77.2476196289...","[549, 77, 644, 153]",7202,"[(596, 115)]","(205, 238, 27)",False
9,8,"Frame 400, TL [637.3250732421875,47.4489097595...","[637, 47, 721, 81]",2825,"[(679, 64)]","(130, 124, 80)",False


In [13]:
# for index, row in detections_pd.iterrows():
#     print(row['colour'])
#     image = drawRectangleOnImage(image, row['bbox'], track=row['track'],colour=row['colour'])

In [14]:
# plt.figure(figsize=(20, 12))
# plt.imshow(image)

In [15]:
#Test......
# test_num = 396
# detections_pd = get_detection_dataframe(all_detections[str(test_num)])
# next_detections_pd = get_detection_dataframe(all_detections[str(test_num+1)])

In [16]:
# detections_pd

Unnamed: 0,track,detection,bbox,size,line,colour,updated
0,1,"Frame 396, TL [1289.6591796875,357.67657470703...","[1289, 357, 1520, 544]",43179,"[(1404, 450)]","(1, 168, 142)",False
1,0,"Frame 396, TL [562.3311767578125,96.7260894775...","[562, 96, 661, 169]",7197,"[(611, 132)]","(246, 50, 91)",False
2,7,"Frame 396, TL [923.9907836914062,78.1038970947...","[923, 78, 1016, 147]",6430,"[(969, 112)]","(65, 207, 47)",False
3,2,"Frame 396, TL [716.2720336914062,82.9245223999...","[716, 82, 786, 142]",4156,"[(751, 112)]","(126, 204, 147)",False
4,6,"Frame 396, TL [1178.8721923828125,101.01545715...","[1178, 101, 1229, 161]",3037,"[(1203, 131)]","(61, 57, 233)",False
5,5,"Frame 396, TL [884.382080078125,93.85597229003...","[884, 93, 941, 141]",2705,"[(912, 117)]","(93, 119, 127)",False
6,4,"Frame 396, TL [734.227783203125,53.06563568115...","[734, 53, 814, 85]",2604,"[(774, 69)]","(121, 140, 235)",False
7,3,"Frame 396, TL [669.8187866210938,102.656311035...","[669, 102, 721, 143]",2116,"[(695, 122)]","(14, 243, 99)",False
8,8,"Frame 396, TL [586.5023193359375,73.9231796264...","[586, 73, 653, 100]",1745,"[(619, 86)]","(74, 118, 20)",False


In [17]:
# next_detections_pd

Unnamed: 0,track,detection,bbox,size,line,colour,updated
0,1,"Frame 397, TL [1289.6507568359375,357.91903686...","[1289, 357, 1520, 544]",43041,"[(1404, 450)]","(236, 224, 65)",False
1,0,"Frame 397, TL [562.4141235351562,96.5187225341...","[562, 96, 661, 168]",7184,"[(611, 132)]","(167, 146, 131)",False
2,9,"Frame 397, TL [546.1447143554688,81.8800659179...","[546, 81, 650, 148]",6894,"[(598, 114)]","(228, 248, 22)",False
3,7,"Frame 397, TL [923.653564453125,78.12397766113...","[923, 78, 1016, 146]",6406,"[(969, 112)]","(22, 240, 186)",False
4,2,"Frame 397, TL [715.5631103515625,83.0884628295...","[715, 83, 788, 141]",4264,"[(751, 112)]","(166, 81, 225)",False
5,6,"Frame 397, TL [1178.8702392578125,101.17209625...","[1178, 101, 1229, 160]",2989,"[(1203, 130)]","(3, 193, 97)",False
6,4,"Frame 397, TL [883.7351684570312,93.6953430175...","[883, 93, 941, 141]",2751,"[(912, 117)]","(116, 242, 171)",False
7,5,"Frame 397, TL [701.275634765625,51.50396728515...","[701, 51, 777, 83]",2457,"[(739, 67)]","(146, 13, 104)",False
8,3,"Frame 397, TL [669.6466674804688,100.004211425...","[669, 100, 725, 142]",2352,"[(697, 121)]","(133, 186, 147)",False
9,8,"Frame 397, TL [586.0680541992188,73.7146682739...","[586, 73, 652, 99]",1738,"[(619, 86)]","(114, 159, 128)",False


In [18]:
# detections_pd = update_track(detections_pd, next_detections_pd, tolerance=0.5)
# detections_pd

Unnamed: 0,track,detection,bbox,size,line,colour,updated
0,1,"Frame 397, TL [1289.6507568359375,357.91903686...","[1289, 357, 1520, 544]",43041,"[(1404, 450), (1404, 450)]","(1, 168, 142)",True
1,0,"Frame 397, TL [562.4141235351562,96.5187225341...","[562, 96, 661, 168]",7184,"[(611, 132), (611, 132)]","(246, 50, 91)",True
2,7,"Frame 397, TL [923.653564453125,78.12397766113...","[923, 78, 1016, 146]",6406,"[(969, 112), (969, 112)]","(65, 207, 47)",True
3,2,"Frame 397, TL [715.5631103515625,83.0884628295...","[715, 83, 788, 141]",4264,"[(751, 112), (751, 112)]","(126, 204, 147)",True
4,6,"Frame 397, TL [1178.8702392578125,101.17209625...","[1178, 101, 1229, 160]",2989,"[(1203, 131), (1203, 130)]","(61, 57, 233)",True
5,5,"Frame 397, TL [883.7351684570312,93.6953430175...","[883, 93, 941, 141]",2751,"[(912, 117), (912, 117)]","(93, 119, 127)",True
6,3,"Frame 397, TL [669.6466674804688,100.004211425...","[669, 100, 725, 142]",2352,"[(695, 122), (697, 121)]","(14, 243, 99)",True
7,8,"Frame 397, TL [586.0680541992188,73.7146682739...","[586, 73, 652, 99]",1738,"[(619, 86), (619, 86)]","(74, 118, 20)",True
8,4,"Frame 397, TL [546.1447143554688,81.8800659179...","[546, 81, 650, 148]",6894,"[(598, 114)]","(228, 248, 22)",False
9,9,"Frame 397, TL [701.275634765625,51.50396728515...","[701, 51, 777, 83]",2457,"[(739, 67)]","(146, 13, 104)",False


In [19]:
# next_detections_pd

Unnamed: 0,track,detection,bbox,size,line,colour,updated
0,1,"Frame 397, TL [1289.6507568359375,357.91903686...","[1289, 357, 1520, 544]",43041,"[(1404, 450)]","(236, 224, 65)",True
1,0,"Frame 397, TL [562.4141235351562,96.5187225341...","[562, 96, 661, 168]",7184,"[(611, 132)]","(167, 146, 131)",True
2,9,"Frame 397, TL [546.1447143554688,81.8800659179...","[546, 81, 650, 148]",6894,"[(598, 114)]","(228, 248, 22)",False
3,7,"Frame 397, TL [923.653564453125,78.12397766113...","[923, 78, 1016, 146]",6406,"[(969, 112)]","(22, 240, 186)",True
4,2,"Frame 397, TL [715.5631103515625,83.0884628295...","[715, 83, 788, 141]",4264,"[(751, 112)]","(166, 81, 225)",True
5,6,"Frame 397, TL [1178.8702392578125,101.17209625...","[1178, 101, 1229, 160]",2989,"[(1203, 130)]","(3, 193, 97)",True
6,4,"Frame 397, TL [883.7351684570312,93.6953430175...","[883, 93, 941, 141]",2751,"[(912, 117)]","(116, 242, 171)",True
7,5,"Frame 397, TL [701.275634765625,51.50396728515...","[701, 51, 777, 83]",2457,"[(739, 67)]","(146, 13, 104)",False
8,3,"Frame 397, TL [669.6466674804688,100.004211425...","[669, 100, 725, 142]",2352,"[(697, 121)]","(133, 186, 147)",True
9,8,"Frame 397, TL [586.0680541992188,73.7146682739...","[586, 73, 652, 99]",1738,"[(619, 86)]","(114, 159, 128)",True
