In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Kalman

In [2]:
from KalmanFilter import KalmanFilter
from Detector import detect
import cv2 as cv

In [3]:
k_filter = KalmanFilter(d_t=0.1, u_x=2, u_y=2, std_acc=1,
                            x_std_meas=0.5, y_std_meas=0.5)

test_file='randomball.avi'

In [4]:
cap = cv.VideoCapture(test_file)

rect_size = np.array([25,25], dtype=int)
rect_half = np.array([12,12], dtype=int)

last_pos = None

fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('output.avi', fourcc, 20.0, (640, 360))
out2 = cv.VideoWriter('path.avi', fourcc, 20.0, (640, 360))

In [5]:
path_lines = []
while cap.isOpened():
    ret, frame = cap.read()
    #print(frame.shape)

    # if frame is read correctly ret is True
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    # apply kalman filter
    obj_centers = detect(frame)

    if len(obj_centers) > 0:
        #print('out:', obj_centers)
        detected_pos = obj_centers[0]
        #print(detected_pos)
        # track first detected object
        pred_pos, P_k = k_filter.predict()
        #print(pred_pos.shape)
        #print(P_k.shape)
        #print(detected_pos.shape)
        k_filter.update(detected_pos, pred_pos, P_k)

        # retrieve predicted position

        estimated_pos = k_filter.xk[:2]

        # draw 
        # detected
        detected = detected_pos.squeeze(1).astype(int)
        cv.circle(frame, tuple(detected), 5, (0,255,0), 3)
        # pred
        #print('pred', pred_pos.shape)
        #print('pred_val', pred_pos)
        pred = pred_pos[:2].squeeze(1).astype(int)
        #print(pred + rect_size)
        cv.rectangle(frame, tuple(pred - rect_half), tuple(pred + rect_half), (0,0,255), 1)
        # estimated
        est = estimated_pos.squeeze(1).astype(int)
        cv.rectangle(frame, tuple(est - rect_half), tuple(est + rect_half), (255,0,0), 1)

        # track path
        if last_pos is not None:
            # draw line from last_pos to current estimated pos
            path_lines.append([tuple(last_pos), tuple(est)])

        last_pos = est


    #cv.imshow('frame', gray)
    out.write(frame)
    if cv.waitKey(1) == ord('q'):
        break

blank_frame = (np.zeros((360, 640, 3), dtype=np.uint8) + 255).copy()
for lines in path_lines:
    cv.line(blank_frame, lines[0], lines[1], (20,20,20), 2)
    out2.write(blank_frame)

cap.release()
out.release()
out2.release()
cv.destroyAllWindows()

Can't receive frame (stream end?). Exiting ...


In [6]:
!dir

 Volume in drive C has no label.
 Volume Serial Number is AC25-8315

 Directory of C:\Users\theop\mlvot\mlvot

01/07/2025  01:19 AM    <DIR>          .
01/05/2025  03:16 PM    <DIR>          ..
01/05/2025  03:17 PM    <DIR>          .git
01/05/2025  09:26 PM    <DIR>          .ipynb_checkpoints
01/05/2025  03:26 PM    <DIR>          ADL-Rundle-6
01/05/2025  04:47 PM             1,304 Detector.py
01/07/2025  01:18 AM            76,657 IoUxKalman.ipynb
01/06/2025  09:52 PM             2,421 KalmanFilter.py
01/05/2025  04:44 PM             2,584 objTracking.py
01/07/2025  01:20 AM         3,572,282 output.avi
01/07/2025  01:20 AM         6,037,754 path.avi
01/05/2025  04:40 PM         3,057,678 randomball.avi
01/06/2025  08:52 PM           843,254 reid_osnet_x025_market1501.onnx
01/06/2025  09:53 PM    <DIR>          __pycache__
               8 File(s)     13,593,934 bytes
               6 Dir(s)  39,228,260,352 bytes free


In [7]:
import os

data_folder = 'ADL-Rundle-6'

In [8]:
detections = pd.read_csv(os.path.join(data_folder, 'det', 'Yolov5l', 'det.txt'), header=None, sep=' ')

In [9]:
path = os.path.join(data_folder, 'det', 'Yolov5l', 'det.txt')

def reload_data(path, confidence=None):
    detections = pd.read_csv(path, header=None, sep=' ')
    detections.drop([7, 8, 9], axis=1, inplace=True)
    detections.rename({0: 'frame', 2: 'left', 3: "top", 4: 'width', 5: 'height', 6: 'confidence', 1: 'tracked_id'}, axis=1, inplace=True)
    # create auxiliary fields to reduce computations
    detections['right'] = np.zeros(len(detections))
    detections['bottom'] = np.zeros(len(detections))
    detections['area'] = np.zeros(len(detections))
    detections['center_x'] = np.zeros(len(detections))
    detections['center_y'] = np.zeros(len(detections))
    detections['pred_left'] = np.zeros(len(detections))
    detections['pred_right'] = np.zeros(len(detections))
    detections['pred_top'] = np.zeros(len(detections))
    detections['pred_bottom'] = np.zeros(len(detections))
    detections['sim'] = np.zeros(len(detections))
    if confidence is not None:
        detections = detections[detections['confidence'].astype(float) >= confidence]
        detections.reset_index(drop=True, inplace=True)
    return detections 

In [10]:
detections = reload_data(path)

In [11]:
detections

Unnamed: 0,frame,tracked_id,left,top,width,height,confidence,right,bottom,area,center_x,center_y,pred_left,pred_right,pred_top,pred_bottom,sim
0,1,-1,1700,391,156,337,0.914550,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,-1,250,456,107,248,0.883148,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,-1,1255,539,60,118,0.826354,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,-1,1288,459,73,199,0.745969,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,-1,120,504,93,239,0.740778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4557,525,-1,1432,457,96,193,0.841939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4558,525,-1,1340,439,95,218,0.815440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4559,525,-1,119,503,87,244,0.639956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4560,525,-1,1676,440,58,201,0.568499,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
from scipy.optimize import linear_sum_assignment

def process_detection_frame(df, i, missing_frames=40, jacc_thres=0.9):
    curr = df[df['frame'] == i].copy()
    # get detections from previous frames where the obj was associated
    prev = df[(df['frame'] > i - missing_frames) & (df['frame'] <= i - 1) & (df['tracked_id'] != -1)].copy()
    prev['og_idx'] = prev.index.values
    prev = prev.groupby('tracked_id').last()
    prev.set_index('og_idx', inplace=True)
    
    corr_mat = np.ones((len(curr), len(prev)))
    
    track_idx = df['tracked_id'].astype(int).max()
    track_idx = track_idx if track_idx > 0 else 0
    #print(track_idx)
    
    for j_id, j in enumerate(curr.index.values):
        obj_1 = df.iloc[j]
        obj_1_area = obj_1['width'] * obj_1['height']
        obj_1_r = obj_1['left'] + obj_1['width']
        obj_1_b = obj_1['top'] + obj_1['height']
        df.iloc[j, df.columns.get_loc('right')] = obj_1_r
        df.iloc[j, df.columns.get_loc('bottom')] = obj_1_b
        df.iloc[j, df.columns.get_loc('area')] = obj_1_area
        
        for k_id, k in enumerate(prev.index.values):
            # compare with each object detected in previous frame
            obj_2 = df.iloc[k]
            obj_2_area = obj_2['area'] #obj_2['width'] * obj_2['height']
            obj_2_r = obj_2['right'] #obj_2['left'] + obj_2['width']
            obj_2_b = obj_2['bottom'] #obj_2['top'] + obj_2['height']

            # compute intersection
            intersec_width = max(0, min(obj_1_r, obj_2_r) - max(obj_1['left'], obj_2['left']))
            intersec_height = max(0, min(obj_1_b, obj_2_b) - max(obj_1['top'], obj_2['top']))

            intersec_area = intersec_height * intersec_width
            union_area = obj_1_area + obj_2_area - intersec_area
            assert union_area != 0, f'{obj_1_area} {obj_2_area} {intersec_area}'

            iou = intersec_area / union_area
            jacc_idx = 1 - iou
            
            corr_mat[j_id,k_id] = jacc_idx
    
    # associate tracks with detections:
    curr_id, best_assc = linear_sum_assignment(corr_mat)

    to_rm = []
    # process to remove values under treshold
    for (r,c) in zip(curr_id, best_assc):
        if corr_mat[r,c] >= jacc_thres:
            to_rm.append((r,c))
    
    # get untracked objects
    new_tracks = [m for m in range(len(curr)) if m not in curr_id] + [m for m, _ in to_rm]
    # create new tracks
    for nt in new_tracks:
        df.iloc[curr.index.values[nt], df.columns.get_loc('tracked_id')] = track_idx + 1
        track_idx += 1

    for (r,c) in zip(curr_id, best_assc):
        if (r,c) in to_rm:
            continue
        # prev
        t_id = df.iloc[prev.index.values[c], df.columns.get_loc('tracked_id')]
        # curr
        df.iloc[curr.index.values[r], df.columns.get_loc('tracked_id')] = t_id
        

def process_video(path):
    df = reload_data(path)
    nb_frames = pd.unique(df['frame']).max()

    for i in range(1, nb_frames + 1):
        process_detection_frame(df, i, jacc_thres=0.8, missing_frames=40)
    return df


In [13]:
tracked = process_video(path)

In [14]:
tracked

Unnamed: 0,frame,tracked_id,left,top,width,height,confidence,right,bottom,area,center_x,center_y,pred_left,pred_right,pred_top,pred_bottom,sim
0,1,1,1700,391,156,337,0.914550,1856.0,728.0,52572.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2,250,456,107,248,0.883148,357.0,704.0,26536.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,3,1255,539,60,118,0.826354,1315.0,657.0,7080.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,4,1288,459,73,199,0.745969,1361.0,658.0,14527.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,5,120,504,93,239,0.740778,213.0,743.0,22227.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4557,525,71,1432,457,96,193,0.841939,1528.0,650.0,18528.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4558,525,69,1340,439,95,218,0.815440,1435.0,657.0,20710.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4559,525,5,119,503,87,244,0.639956,206.0,747.0,21228.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4560,525,68,1676,440,58,201,0.568499,1734.0,641.0,11658.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
import cv2 as cv

def render_tracking(imgs_path, df, out):
    tmp_img = cv.imread(os.path.join(imgs_path, os.listdir(imgs_path)[0]))
    
    fourcc = cv.VideoWriter_fourcc(*'DIVX')
    out = cv.VideoWriter(out + '.avi', fourcc, 24.0, (tmp_img.shape[1], tmp_img.shape[0]))
    
    for i, im_path in enumerate(os.listdir(imgs_path)):
        if i == 0:
            continue
        im = cv.imread(os.path.join(imgs_path, im_path))
        data = df[df['frame'] == i].values
        for j in range(len(data)):
            obj = data[j]
            cv.rectangle(im, (int(obj[2]), int(obj[3])), (int(obj[2]) + int(obj[4]), int(obj[3]) + int(obj[5])), (255,0,0), 2)
            cv.putText(im, str(obj[1]), (int(obj[2]), int(obj[3])), cv.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv.LINE_AA)
            
        out.write(im)
    out.release()
    cv.destroyAllWindows()

In [16]:
render_tracking(os.path.join('ADL-Rundle-6', 'img1'), tracked, 'out_iou')

# Add kalman filters

In [17]:
from KalmanFilter import KalmanFilter

In [18]:
WIDTH = 1920
HEIGHT = 1080

In [19]:
filters = []

def process_detection_frame_kalman(df, i, missing_frames=50, jacc_thres=0.99, kalman_min_c=2, steps=True, dt=0.1):
    #print('frame', i)
    # filters is list of KalmanFilter classes for each tracked object

    curr = df[df['frame'] == i].copy()
    # get detections from previous frames where the obj was associated
    prev = df[(df['frame'] > i - missing_frames) & (df['frame'] <= i - 1) & (df['tracked_id'] != -1)].copy()
    prev['og_idx'] = prev.index.values
    prev = prev.groupby('tracked_id').last()
    prev.set_index('og_idx', inplace=True)
    
    corr_mat = np.ones((len(curr), len(prev)))
    
    track_idx = df['tracked_id'].astype(int).max()
    track_idx = track_idx if track_idx > 0 else 0
    #print(track_idx)

    prev_predictions = []
    prev_errors = []
    # pre compute kalman predictions and errors
    for p in prev.index.values:
        obj = df.iloc[p]
        pred, err = filters[int(obj['tracked_id'] - 1)].predict()
        prev_predictions.append(pred)
        prev_errors.append(err)
    
    for j_id, j in enumerate(curr.index.values):
        obj_1 = df.iloc[j]
        #print('CURRENT')
        #print(obj_1)
        obj_1_area = obj_1['width'] * obj_1['height']
        obj_1_r = obj_1['left'] + obj_1['width']
        obj_1_b = obj_1['top'] + obj_1['height']
        obj_1_cx = int((obj_1_r + obj_1['left']) / 2)
        obj_1_cy = int((obj_1_b + obj_1['top']) / 2)
        
        # update fields in df to reduce computations
        df.iloc[j, df.columns.get_loc('right')] = obj_1_r
        df.iloc[j, df.columns.get_loc('bottom')] = obj_1_b
        df.iloc[j, df.columns.get_loc('area')] = obj_1_area
        df.iloc[j, df.columns.get_loc('center_x')] = obj_1_cx
        df.iloc[j, df.columns.get_loc('center_y')] = obj_1_cy
        
        for k_id, k in enumerate(prev.index.values):
            # compare with each object detected in previous frame
            # apply kalman filter to predict pos
            obj_2 = df.iloc[k]
            #if j_id == 0:
                #print('PREV')
                #print(obj_2)

            # get next pos with Kalman
            if filters[int(obj_2['tracked_id'] - 1)].counter < kalman_min_c:
                pred = [obj_2['center_x'], obj_2['center_y']]
            else:
                #print(f'counter for {obj_2['tracked_id']} above thres')
                if steps and (diff := obj_2['frame'] - i) > 1:
                    pred = filters[int(obj_2['tracked_id'] - 1)].predict_steps(diff+1)
                else:
                    pred = prev_predictions[k_id]

            obj_2_l = max(0,int(pred[0].item() - (obj_2['width'] / 2)))
            obj_2_r = min(WIDTH, int(pred[0].item() + (obj_2['width'] / 2)))
            obj_2_t = max(0,int(pred[1].item() - (obj_2['height'] / 2)))
            obj_2_b = min(HEIGHT, int(pred[1].item() + (obj_2['height'] / 2)))

            # compute intersection
            intersec_width = max(0, min(obj_1_r, obj_2_r) - max(obj_1['left'], obj_2_l))
            intersec_height = max(0, min(obj_1_b, obj_2_b) - max(obj_1['top'], obj_2_t))

            intersec_area = intersec_height * intersec_width
            union_area = obj_1_area + obj_2['area'] - intersec_area

            iou = intersec_area / union_area
            jacc_idx = 1 - iou
            
            corr_mat[j_id,k_id] = jacc_idx
    
    # associate tracks with detections:
    curr_id, best_assc = linear_sum_assignment(corr_mat)
    #print(curr_id, best_assc)

    to_rm = []
    # process to remove values under treshold
    for (r,c) in zip(curr_id, best_assc):
        if corr_mat[r,c] >= jacc_thres:
            #print(f'remove {(r,c)} val {corr_mat[r,c]}')
            to_rm.append((r,c))
    
    # get untracked objects
    new_tracks = [m for m in range(len(curr)) if m not in curr_id] + [m for m, _ in to_rm]
    # create new tracks
    for nt in new_tracks:
        df.iloc[curr.index.values[nt], df.columns.get_loc('tracked_id')] = track_idx + 1
        track_idx += 1
        # create associated filter and feed first position
        filters.append(KalmanFilter(dt, 1, 1, 1, 0.1, 0.1))
        pred, P_k = filters[track_idx - 1].predict()
        obj_center = np.expand_dims(np.array([df.iloc[curr.index.values[nt], df.columns.get_loc('center_x')], 
                      df.iloc[curr.index.values[nt], df.columns.get_loc('center_y')]]), 1)
        filters[track_idx - 1].update(obj_center, pred, P_k)
        #print(f'added track {track_idx}')

    for (r,c) in zip(curr_id, best_assc):
        if (r,c) in to_rm:
            continue
        #print('here')
        
        # prev
        t_id = df.iloc[prev.index.values[c], df.columns.get_loc('tracked_id')]
        # curr
        df.iloc[curr.index.values[r], df.columns.get_loc('tracked_id')] = t_id
        # update kalman filters' state
        pred = prev_predictions[c]
        err = prev_errors[c]
        obj_center = np.expand_dims(np.array([df.iloc[curr.index.values[r], df.columns.get_loc('center_x')], 
                      df.iloc[curr.index.values[r], df.columns.get_loc('center_y')]]), 1)
        
        filters[t_id-1].update(obj_center, pred, err)
        #print(f'link {(r,c)} ({(curr.index.values[r], prev.index.values[c])}) track_id {t_id}')

        #if filters[t_id - 1].counter > kalman_min_c:
        po = df.iloc[prev.index.values[c]]
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_left')] = int(pred[0].item() - (po['width'] / 2))
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_right')] = int(pred[0].item() + (po['width'] / 2))
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_top')] = int(pred[1].item() - (po['height'] / 2))
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_bottom')] = int(pred[1].item() + (po['height'] / 2))


In [20]:
def process_video_kalman(path):
    df = reload_data(path)
    nb_frames = pd.unique(df['frame']).max()
    filters = []

    for i in range(1, nb_frames + 1):
        process_detection_frame_kalman(df, i, kalman_min_c=24, jacc_thres=0.99, missing_frames=40, steps=True, dt=1/5)
    return df

In [21]:
def render_tracking_with_preds(imgs_path, df, out):
    tmp_img = cv.imread(os.path.join(imgs_path, os.listdir(imgs_path)[0]))
    
    fourcc = cv.VideoWriter_fourcc(*'DIVX')
    out = cv.VideoWriter(out + '.avi', fourcc, 24.0, (tmp_img.shape[1], tmp_img.shape[0]))
    
    for i, im_path in enumerate(os.listdir(imgs_path)):
        if i == 0:
            continue
        im = cv.imread(os.path.join(imgs_path, im_path))
        data = df[df['frame'] == i].values
        for j in range(len(data)):
            obj = data[j]
            cv.rectangle(im, (int(obj[2]), int(obj[3])), (int(obj[2]) + int(obj[4]), int(obj[3]) + int(obj[5])), (255,0,0), 2)
            cv.putText(im, str(obj[1]), (int(obj[2]), int(obj[3])), cv.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv.LINE_AA)
            if int(obj[13]) > 1 or int(obj[15]) > 1:
                cv.rectangle(im, (int(obj[12]), int(obj[14])), (int(obj[13]), int(obj[15])), (0,255,0), 3)
                cv.putText(im, str(obj[1]), (int(obj[12]), int(obj[14])), cv.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv.LINE_AA)
            
        out.write(im)
    out.release()
    cv.destroyAllWindows()

In [22]:
filters = []
tracked_kalman = process_video_kalman(path)

In [23]:
tracked_kalman

Unnamed: 0,frame,tracked_id,left,top,width,height,confidence,right,bottom,area,center_x,center_y,pred_left,pred_right,pred_top,pred_bottom,sim
0,1,1,1700,391,156,337,0.914550,1856.0,728.0,52572.0,1778.0,559.0,0.0,0.0,0.0,0.0,0.0
1,1,2,250,456,107,248,0.883148,357.0,704.0,26536.0,303.0,580.0,0.0,0.0,0.0,0.0,0.0
2,1,3,1255,539,60,118,0.826354,1315.0,657.0,7080.0,1285.0,598.0,0.0,0.0,0.0,0.0,0.0
3,1,4,1288,459,73,199,0.745969,1361.0,658.0,14527.0,1324.0,558.0,0.0,0.0,0.0,0.0,0.0
4,1,5,120,504,93,239,0.740778,213.0,743.0,22227.0,166.0,623.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4557,525,55,1432,457,96,193,0.841939,1528.0,650.0,18528.0,1480.0,553.0,849.0,926.0,237.0,432.0,0.0
4558,525,42,1340,439,95,218,0.815440,1435.0,657.0,20710.0,1387.0,548.0,1340.0,1437.0,443.0,657.0,0.0
4559,525,7,119,503,87,244,0.639956,206.0,747.0,21228.0,162.0,625.0,117.0,207.0,504.0,748.0,0.0
4560,525,37,1676,440,58,201,0.568499,1734.0,641.0,11658.0,1705.0,540.0,1674.0,1735.0,442.0,638.0,0.0


In [24]:
render_tracking_with_preds(os.path.join('ADL-Rundle-6', 'img1'), tracked_kalman, 'out_kalman')

# Re Identification

In [25]:
import onnxruntime as ort
from sklearn.metrics.pairwise import cosine_similarity

def load_model(path):
    return ort.InferenceSession(path)

def inference(model, input):
    return model.run(None, {'modelInput': input})[0]

def cosine_sim(a, b):
    return cosine_similarity(a, b)

In [26]:
model = load_model('reid_osnet_x025_market1501.onnx')
input_shape = model.get_inputs()[0].shape
print(input_shape)

['batch_size', 3, 128, 64]


In [27]:
model_path = 'reid_osnet_x025_market1501.onnx'

In [28]:
import torch

def crop_image(base_im, top, left, width, height):
    cropped = base_im[int(top):int(top+height), int(left):int(left+width), :]
    resized = cv.resize(cropped, (128, 64), interpolation=cv.INTER_LINEAR).astype(np.float32)
    mean = np.mean(resized)
    std = np.std(resized)
    resized = (resized - mean) / std
    return np.expand_dims(np.moveaxis(np.moveaxis(resized, -1, 0), 1, -1), 0)


In [29]:
imgs_path = os.path.join('ADL-Rundle-6', 'img1')

In [30]:
filters = []

def process_detection_frame_kalman_re_id(df, i, imgs_path, model, alpha=0.5, beta=0.5, missing_frames=50, sim_thres=0.3,
                                         jacc_thres=0.99, iou_thres=0.1, kalman_min_c=2, steps=True, dt=0.1):
    assert np.round(alpha + beta, decimals=4) == 1.0
    curr = df[df['frame'] == i].copy()
    curr_frame_path = os.path.join(imgs_path, str(i).zfill(6) + '.jpg')
    curr_frame = cv.imread(curr_frame_path)
    # get detections from previous frames where the obj was associated
    prev = df[(df['frame'] > i - missing_frames) & (df['frame'] <= i - 1) & (df['tracked_id'] != -1)].copy()
    prev['og_idx'] = prev.index.values
    prev = prev.groupby('tracked_id').last()
    prev.set_index('og_idx', inplace=True)
    
    corr_mat = np.ones((len(curr), len(prev)))
    sim_mat = np.zeros((len(curr), len(prev)))
    
    track_idx = df['tracked_id'].astype(int).max()
    track_idx = track_idx if track_idx > 0 else 0

    prev_predictions = []
    prev_errors = []
    prev_latents = []
    # pre compute kalman predictions & errors and latents
    for p in prev.index.values:
        obj = df.iloc[p]
        pred, err = filters[int(obj['tracked_id'] - 1)].predict()
        prev_predictions.append(pred)
        prev_errors.append(err)

        frame_path = os.path.join(imgs_path, str(int(obj['frame'])).zfill(6) + '.jpg')
        frame = cv.imread(frame_path)
        im = crop_image(frame, obj['top'], obj['left'], obj['width'], obj['height'])
        latent = inference(model, im)
        prev_latents.append(latent)
    
    for j_id, j in enumerate(curr.index.values):
        obj_1 = df.iloc[j]
        obj_1_area = obj_1['width'] * obj_1['height']
        obj_1_r = obj_1['left'] + obj_1['width']
        obj_1_b = obj_1['top'] + obj_1['height']
        obj_1_cx = int((obj_1_r + obj_1['left']) / 2)
        obj_1_cy = int((obj_1_b + obj_1['top']) / 2)

        # compute latent for curr object
        obj_1_im = crop_image(curr_frame, obj_1['top'], obj_1['left'], obj_1['width'], obj_1['height'])
        curr_latent = inference(model, obj_1_im)
        
        # update fields in df to reduce computations
        df.iloc[j, df.columns.get_loc('right')] = obj_1_r
        df.iloc[j, df.columns.get_loc('bottom')] = obj_1_b
        df.iloc[j, df.columns.get_loc('area')] = obj_1_area
        df.iloc[j, df.columns.get_loc('center_x')] = obj_1_cx
        df.iloc[j, df.columns.get_loc('center_y')] = obj_1_cy
        
        for k_id, k in enumerate(prev.index.values):
            # compare with each object detected in previous frame
            # apply kalman filter to predict pos
            obj_2 = df.iloc[k]

            predicted = False
            # get next pos with Kalman
            if filters[int(obj_2['tracked_id'] - 1)].counter < kalman_min_c:
                pred = [obj_2['center_x'], obj_2['center_y']]
            else:
                predicted = True
                if steps and (diff := int(obj_2['frame']) - i) > 1:
                    pred = filters[int(obj_2['tracked_id'] - 1)].predict_steps(diff+1)
                else:
                    pred = prev_predictions[k_id]

            obj_2_l = max(0,int(pred[0].item() - (obj_2['width'] / 2)))
            obj_2_r = min(WIDTH, int(pred[0].item() + (obj_2['width'] / 2)))
            obj_2_t = max(0,int(pred[1].item() - (obj_2['height'] / 2)))
            obj_2_b = min(HEIGHT, int(pred[1].item() + (obj_2['height'] / 2)))

            # get pre computed latent
            obj_2_latent = prev_latents[k_id]

            # compute similarity score
            cos_sim = cosine_sim(curr_latent, obj_2_latent).item()
            if cos_sim < sim_thres:
                cos_sim = 0

            sim_mat[j_id, k_id] = cos_sim

            # compute intersection
            intersec_width = max(0, min(obj_1_r, obj_2_r) - max(obj_1['left'], obj_2_l))
            intersec_height = max(0, min(obj_1_b, obj_2_b) - max(obj_1['top'], obj_2_t))

            intersec_area = intersec_height * intersec_width
            union_area = obj_1_area + obj_2['area'] - intersec_area

            iou = intersec_area / union_area
            if iou > iou_thres and (cos_sim > 0 or predicted):
                S = alpha * iou + beta * ((cos_sim + 1) / 2) # bc cos_sim is between [-1,1]
            else:
                S = 0
            jacc_idx = 1 - S
            
            corr_mat[j_id,k_id] = jacc_idx
    
    # associate tracks with detections:
    curr_id, best_assc = linear_sum_assignment(corr_mat)

    to_rm = []
    # process to remove values under treshold
    for (r,c) in zip(curr_id, best_assc):
        if corr_mat[r,c] >= jacc_thres:
            to_rm.append((r,c))
    
    # get untracked objects
    new_tracks = [m for m in range(len(curr)) if m not in curr_id] + [m for m, _ in to_rm]
    # create new tracks
    for nt in new_tracks:
        df.iloc[curr.index.values[nt], df.columns.get_loc('tracked_id')] = track_idx + 1
        track_idx += 1
        # create associated filter and feed first position
        filters.append(KalmanFilter(dt, 1, 1, 1, 0.1, 0.1))
        pred, P_k = filters[track_idx - 1].predict()
        obj_center = np.expand_dims(np.array([df.iloc[curr.index.values[nt], df.columns.get_loc('center_x')], 
                      df.iloc[curr.index.values[nt], df.columns.get_loc('center_y')]]), 1)
        filters[track_idx - 1].update(obj_center, pred, P_k)

    for (r,c) in zip(curr_id, best_assc):
        if (r,c) in to_rm:
            continue
        
        # prev
        t_id = df.iloc[prev.index.values[c], df.columns.get_loc('tracked_id')]
        # curr
        df.iloc[curr.index.values[r], df.columns.get_loc('tracked_id')] = t_id
        # update kalman filters' state
        pred = prev_predictions[c]
        err = prev_errors[c]
        obj_center = np.expand_dims(np.array([df.iloc[curr.index.values[r], df.columns.get_loc('center_x')], 
                      df.iloc[curr.index.values[r], df.columns.get_loc('center_y')]]), 1)
        
        filters[t_id-1].update(obj_center, pred, err)

        po = df.iloc[prev.index.values[c]]
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_left')] = int(pred[0].item() - (po['width'] / 2))
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_right')] = int(pred[0].item() + (po['width'] / 2))
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_top')] = int(pred[1].item() - (po['height'] / 2))
        df.iloc[curr.index.values[r], df.columns.get_loc('pred_bottom')] = int(pred[1].item() + (po['height'] / 2))
        df.iloc[curr.index.values[r], df.columns.get_loc('sim')] = sim_mat[r,c]


In [31]:
import time

def process_video_re_id(path, imgs_path, model_path, alpha=0.6, beta=0.4):
    start = time.time()
    df = reload_data(path, confidence=0.8)
    nb_frames = pd.unique(df['frame']).max()

    model = load_model(model_path)

    for i in range(1, nb_frames + 1):
        process_detection_frame_kalman_re_id(df, i, imgs_path, model, alpha, beta, kalman_min_c=24, jacc_thres=0.99, 
                                             missing_frames=60, sim_thres=0.85, steps=True, iou_thres=0.1, dt=1/5)
    print('Compute time:', time.time() - start)
    return df

In [32]:
def render_tracking_with_preds_and_sim(imgs_path, df, out):
    tmp_img = cv.imread(os.path.join(imgs_path, os.listdir(imgs_path)[0]))
    
    fourcc = cv.VideoWriter_fourcc(*'DIVX')
    out = cv.VideoWriter(out + '.avi', fourcc, 24.0, (tmp_img.shape[1], tmp_img.shape[0]))
    
    for i, im_path in enumerate(os.listdir(imgs_path)):
        if i == 0:
            continue
        im = cv.imread(os.path.join(imgs_path, im_path))
        data = df[df['frame'] == i].values
        for j in range(len(data)):
            obj = data[j]
            cv.rectangle(im, (int(obj[2]), int(obj[3])), (int(obj[2]) + int(obj[4]), int(obj[3]) + int(obj[5])), (255,0,0), 2)
            cv.putText(im, str(obj[1]), (int(obj[2]), int(obj[3])), cv.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv.LINE_AA)
            cv.putText(im, str(obj[-1]), (int(obj[7]), int(obj[3])), cv.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv.LINE_AA)
            if int(obj[13]) > 1 or int(obj[15]) > 1:
                cv.rectangle(im, (int(obj[12]), int(obj[14])), (int(obj[13]), int(obj[15])), (0,255,0), 3)
                cv.putText(im, str(obj[1]), (int(obj[12]), int(obj[14])), cv.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv.LINE_AA)
            
        out.write(im)
    out.release()
    cv.destroyAllWindows()

In [33]:
filters = []

tracked_kalman_reid = process_video_re_id(path, imgs_path, model_path)

Compute time: 116.78529834747314


In [34]:
tracked_kalman_reid

Unnamed: 0,frame,tracked_id,left,top,width,height,confidence,right,bottom,area,center_x,center_y,pred_left,pred_right,pred_top,pred_bottom,sim
0,1,1,1700,391,156,337,0.914550,1856.0,728.0,52572.0,1778.0,559.0,0.0,0.0,0.0,0.0,0.000000
1,1,2,250,456,107,248,0.883148,357.0,704.0,26536.0,303.0,580.0,0.0,0.0,0.0,0.0,0.000000
2,1,3,1255,539,60,118,0.826354,1315.0,657.0,7080.0,1285.0,598.0,0.0,0.0,0.0,0.0,0.000000
3,2,1,1689,390,163,338,0.900352,1852.0,728.0,55094.0,1770.0,559.0,-56.0,99.0,-161.0,175.0,0.939002
4,2,2,262,456,96,250,0.875928,358.0,706.0,24000.0,310.0,581.0,-49.0,57.0,-116.0,131.0,0.955808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2705,525,30,692,302,282,569,0.928935,974.0,871.0,160458.0,833.0,586.0,724.0,1001.0,295.0,869.0,0.957883
2706,525,51,1837,380,83,306,0.876149,1920.0,686.0,25398.0,1878.0,533.0,1836.0,1917.0,381.0,685.0,0.966141
2707,525,39,1610,403,82,265,0.845143,1692.0,668.0,21730.0,1651.0,535.0,1609.0,1694.0,405.0,675.0,0.953247
2708,525,55,1432,457,96,193,0.841939,1528.0,650.0,18528.0,1480.0,553.0,91.0,168.0,-48.0,146.0,0.932009


In [35]:
render_tracking_with_preds_and_sim(os.path.join('ADL-Rundle-6', 'img1'), tracked_kalman_reid, 'out_reid')