In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import cv2
import numpy as np
import tensorflow as tf
from tracker import Object_Tracker
from ultralytics import YOLO
import random
from deep_sort import generate_detections as gdet
import time

In [2]:
tracker= Object_Tracker()

In [3]:
model = YOLO('yolov8n.pt')

In [4]:
data_dir= 'C:\\dataset2014\\results'

In [5]:
all_items = os.listdir(data_dir)
categories = [item for item in all_items if os.path.isdir(os.path.join(data_dir, item))]
print(categories)

['badWeather', 'baseline', 'cameraJitter', 'dynamicBackground', 'intermittentObjectMotion', 'lowFramerate', 'nightVideos', 'PTZ', 'shadow', 'thermal', 'turbulence']


In [6]:
img=  'C:\\dataset2014\\results\\badWeather\\skating\\in000001.jpg'
img= cv2.imread(img)
results= model(img)

names=results[0].names


0: 640x640 1 car, 125.9ms
Speed: 5.0ms preprocess, 125.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


In [16]:
names

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [7]:
img.shape

(320, 320, 3)

In [8]:
colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for j in range(10)]

detection_threshold = 0.5

In [9]:
pwd

'C:\\Users\\Toqa Alaa\\deep_learning\\Real-time-object-tracking'

In [10]:
def non_max_suppression(detections,  threshold):

    if len(detections) == 0:
        return [], [], []

    boxes= [box[0:4] for box in detections]
    scores= [box[4].item() for box in detections]
    classes= [box[5] for box in detections]
    
    boxes= np.array(boxes)
    scores= np.array(scores)
    classes= np.array(classes)

    # Initialize an empty list to store the selected boxes.
    selected_boxes = []
    selected_scores= []
    selected_classes= []

    # Sort the boxes by their confidence scores in descending order.
    sorted_indices = np.argsort(scores)[::-1]
    boxes = boxes[sorted_indices]
    scores = scores[sorted_indices]
    classes= classes[sorted_indices]

    while len(boxes) > 0:
        # Select the box with the highest confidence score and add it to the selected list.
        selected_boxes.append(boxes[0])
        selected_scores.append(scores[0])
        selected_classes.append(classes[0])

        # Calculate the IoU (Intersection over Union) between the selected box and the remaining boxes.
        iou = calculate_iou(selected_boxes[-1], boxes[1:])

        # Filter out the boxes with IoU greater than or equal to the threshold.
        mask = iou < threshold
        boxes = boxes[1:][mask]
        scores = scores[1:][mask]

    return selected_boxes, selected_scores, selected_classes

In [11]:
def calculate_iou(box1, boxes2):
    x1, y1, x2, y2 = box1
    x1 = np.maximum(x1, boxes2[:, 0])
    y1 = np.maximum(y1, boxes2[:, 1])
    x2 = np.minimum(x2, boxes2[:, 2])
    y2 = np.minimum(y2, boxes2[:, 3])

    intersection_area = np.maximum(0, x2 - x1 + 1) * np.maximum(0, y2 - y1 + 1)
    box1_area = (x2 - x1 + 1) * (y2 - y1 + 1)
    boxes2_area = (boxes2[:, 2] - boxes2[:, 0] + 1) * (boxes2[:, 3] - boxes2[:, 1] + 1)

    iou = intersection_area / (box1_area + boxes2_area - intersection_area)
    return iou

In [12]:
data_path='C:\\dataset2014\\results\\baseline\\highway'
video_path= 'traffic_video.mp4'

In [13]:
vid = cv2.VideoCapture(video_path) 

In [14]:
frames=[]

In [15]:
while(True):
# start_time = time.time()

# for img in os.listdir(data_path):
#     frame = cv2.imread(os.path.join(data_path, img))
    _, frame = vid.read()
    # Perform object detection using YOLO on the current frame
    results = model(frame)  

    # Update the tracks using the DeepSort tracker
    for result in results:
        detections = []
        

        # Creating the detections array
        for r in result.boxes.data:
            x1, y1, x2, y2, score, class_id = r
            x1 = int(x1)
            x2 = int(x2)
            y1 = int(y1)
            y2 = int(y2)

            class_id = int(class_id)
            class_name= names[class_id]
            if score>=0.3:
                detections.append([x1, y1, x2, y2, score, class_name])
            
        
       
        boxes, score, classes= non_max_suppression(detections, 0.5)
        detections=[]
        for i in range(len(boxes)):
            b= []
            b.append(boxes[i][0])
            b.append(boxes[i][1])
            b.append(boxes[i][2])
            b.append(boxes[i][3])
            b.append(score[i])
            b.append(classes[i])
            
            
            detections.append(b)
        
        print(detections)
    
        tracker.update(frame, detections)
        # Updating the tracks with the new frame
        for track in tracker.tracks:
            bbox = track.bbox
            x1, y1, x2, y2 = bbox
            track_id = track.track_id
            
            

            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (colors[track_id % len(colors)]), 3)

            # Define the text you want to add
            text = track.class_name

            # Define the position where you want to place the text (adjust coordinates as needed)
            text_x = int(x1)  # You can adjust the x-coordinate as needed
            text_y = int(y1) - 10  # You can adjust the y-coordinate as needed

            # Get the color of the rectangle
            rectangle_color = colors[track_id % len(colors)]

            # Get the size of the text to calculate the size of the background rectangle
            (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            background_width = text_width + 10
            background_height = text_height + 10

            # Define the position of the background rectangle
            background_x = text_x
            background_y = text_y - text_height

            # Draw the background rectangle
            cv2.rectangle(frame, (background_x, background_y), (background_x + background_width, background_y + background_height), rectangle_color, -1)

            # Define the font and font scale
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.5

            # Define the color of the text (you can change this if needed)
            text_color = (255, 255, 255)  # White color in BGR

            # Add the text to the frame
            cv2.putText(frame, text, (text_x, text_y), font, font_scale, text_color, 1, cv2.LINE_AA)
        # Save the frame with tracking results
        cv2.imshow('img', frame)
        
        frames.append(frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cv2.destroyAllWindows()
vid.release()


0: 384x640 1 car, 77.8ms
Speed: 11.0ms preprocess, 77.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


[[607, 307, 633, 336, 0.6374568939208984, 'car']]



0: 384x640 1 car, 82.8ms
Speed: 3.0ms preprocess, 82.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 76.8ms
Speed: 2.0ms preprocess, 76.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 70.8ms


[[607, 307, 633, 336, 0.6399165391921997, 'car']]
[[606, 307, 633, 336, 0.6196421980857849, 'car']]


Speed: 4.0ms preprocess, 70.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 56.8ms
Speed: 3.0ms preprocess, 56.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 60.8ms
Speed: 2.0ms preprocess, 60.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)



[[605, 308, 633, 337, 0.6649579405784607, 'car']]
[[604, 308, 632, 338, 0.5958523154258728, 'car']]
[[1102, 698, 1300, 764, 0.7092212438583374, 'car'], [601, 309, 631, 339, 0.6431823372840881, 'car']]


0: 384x640 2 cars, 63.8ms
Speed: 2.0ms preprocess, 63.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 76.8ms
Speed: 3.0ms preprocess, 76.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)



[[1081, 678, 1281, 763, 0.7404282093048096, 'car'], [600, 309, 631, 339, 0.6189277768135071, 'car']]
[[1050, 637, 1229, 765, 0.7544621229171753, 'car'], [599, 309, 628, 341, 0.5587347745895386, 'car']]


0: 384x640 2 cars, 86.8ms
Speed: 3.0ms preprocess, 86.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 75.8ms
Speed: 3.0ms preprocess, 75.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[1034, 621, 1199, 738, 0.5695949196815491, 'car'], [598, 310, 628, 342, 0.49722540378570557, 'car']]
[[1019, 604, 1178, 714, 0.6427969336509705, 'car'], [596, 309, 627, 344, 0.5056697130203247, 'car']]


0: 384x640 2 cars, 72.8ms
Speed: 3.0ms preprocess, 72.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 65.8ms
Speed: 2.0ms preprocess, 65.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[1007, 589, 1156, 698, 0.7221307754516602, 'car'], [595, 309, 627, 345, 0.4473961591720581, 'car']]
[[993, 574, 1135, 677, 0.6083101630210876, 'car'], [594, 309, 628, 346, 0.4808262288570404, 'car']]


0: 384x640 2 cars, 72.1ms
Speed: 2.0ms preprocess, 72.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 1 truck, 64.8ms
Speed: 3.0ms preprocess, 64.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 64.8ms


[[973, 553, 1101, 636, 0.6526740193367004, 'car'], [590, 317, 622, 349, 0.6280165314674377, 'car']]
[[963, 543, 1084, 624, 0.6488127708435059, 'car'], [589, 323, 620, 350, 0.6060664653778076, 'car']]


Speed: 5.0ms preprocess, 64.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 86.8ms
Speed: 2.0ms preprocess, 86.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[588, 317, 619, 350, 0.6298187971115112, 'car'], [955, 531, 1066, 616, 0.5315497517585754, 'car']]
[[945, 523, 1055, 597, 0.7335258722305298, 'car'], [585, 317, 619, 352, 0.5994157195091248, 'car']]


0: 384x640 3 cars, 71.8ms
Speed: 3.0ms preprocess, 71.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 69.8ms
Speed: 2.0ms preprocess, 69.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[584, 319, 619, 353, 0.6253370046615601, 'car'], [938, 512, 1044, 591, 0.5906488299369812, 'car']]
[[582, 320, 620, 354, 0.634755551815033, 'car'], [923, 500, 1019, 566, 0.6043171286582947, 'car'], [625, 296, 647, 312, 0.3027831017971039, 'car']]


0: 384x640 3 cars, 71.8ms
Speed: 3.0ms preprocess, 71.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 62.8ms
Speed: 2.0ms preprocess, 62.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[916, 493, 1008, 557, 0.7044394016265869, 'car'], [581, 320, 619, 355, 0.6185693144798279, 'car']]
[[911, 485, 999, 550, 0.6121188998222351, 'car'], [580, 320, 617, 357, 0.5277774930000305, 'car'], [625, 295, 648, 314, 0.30623337626457214, 'car']]


0: 384x640 6 cars, 74.8ms
Speed: 3.0ms preprocess, 74.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 75.8ms
Speed: 3.0ms preprocess, 75.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[904, 478, 990, 538, 0.6549689173698425, 'car'], [578, 321, 615, 359, 0.4976878762245178, 'car'], [625, 295, 648, 314, 0.3494219481945038, 'car']]
[[574, 321, 618, 360, 0.49687233567237854, 'car'], [898, 475, 985, 531, 0.4597446322441101, 'car'], [624, 296, 647, 314, 0.3942779004573822, 'car']]


0: 384x640 4 cars, 74.8ms
Speed: 2.0ms preprocess, 74.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 71.8ms
Speed: 2.0ms preprocess, 71.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[887, 463, 967, 521, 0.7183414697647095, 'car'], [570, 331, 605, 363, 0.5239184498786926, 'car'], [623, 294, 647, 315, 0.39717555046081543, 'car']]
[[883, 458, 960, 508, 0.6242813467979431, 'car'], [569, 332, 604, 365, 0.5623509883880615, 'car'], [623, 292, 647, 315, 0.4009203016757965, 'car']]


0: 384x640 3 cars, 65.8ms
Speed: 3.0ms preprocess, 65.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 86.8ms
Speed: 3.0ms preprocess, 86.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[567, 335, 604, 366, 0.623421847820282, 'car'], [622, 292, 648, 315, 0.3829375207424164, 'car'], [878, 452, 951, 510, 0.3571438193321228, 'car']]
[[875, 449, 948, 504, 0.6983095407485962, 'car'], [565, 339, 602, 367, 0.5995883345603943, 'car']]


0: 384x640 2 cars, 68.8ms
Speed: 3.0ms preprocess, 68.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 65.8ms
Speed: 2.0ms preprocess, 65.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 63.8ms


[[565, 337, 601, 369, 0.6620962023735046, 'car'], [869, 441, 941, 494, 0.62066650390625, 'car']]
[[864, 434, 928, 475, 0.47896793484687805, 'car'], [564, 336, 604, 373, 0.47777917981147766, 'car']]


Speed: 2.0ms preprocess, 63.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 76.8ms
Speed: 2.0ms preprocess, 76.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 60.8ms


[[865, 431, 922, 461, 0.6436747312545776, 'car'], [562, 338, 598, 372, 0.4876951277256012, 'car']]
[[857, 428, 915, 459, 0.5555991530418396, 'car'], [558, 338, 597, 373, 0.5491067171096802, 'car']]


Speed: 3.0ms preprocess, 60.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 62.8ms
Speed: 2.0ms preprocess, 62.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 58.8ms
Speed: 2.0ms preprocess, 58.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[853, 423, 914, 463, 0.6168088912963867, 'car'], [557, 337, 596, 374, 0.4938786029815674, 'car']]
[[555, 340, 591, 376, 0.5008469223976135, 'car'], [851, 420, 912, 461, 0.46970903873443604, 'car']]
[[550, 343, 589, 379, 0.4778004586696625, 'car'], [846, 415, 899, 445, 0.42138296365737915, 'car']]


0: 384x640 4 cars, 71.8ms
Speed: 2.0ms preprocess, 71.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 64.8ms
Speed: 3.0ms preprocess, 64.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[547, 346, 588, 382, 0.6935564875602722, 'car'], [570, 326, 606, 347, 0.4034211039543152, 'car'], [841, 409, 898, 445, 0.3866082429885864, 'car']]
[[545, 348, 584, 383, 0.6013485789299011, 'car'], [570, 326, 604, 347, 0.385246217250824, 'car'], [838, 407, 896, 441, 0.33653923869132996, 'car']]


0: 384x640 5 cars, 77.8ms
Speed: 2.0ms preprocess, 77.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 cars, 63.8ms
Speed: 2.0ms preprocess, 63.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[543, 348, 584, 385, 0.6444501280784607, 'car'], [569, 326, 604, 349, 0.46315085887908936, 'car'], [615, 300, 641, 321, 0.3334594666957855, 'car'], [836, 404, 904, 443, 0.31241124868392944, 'car']]
[[539, 349, 584, 388, 0.6411684155464172, 'car'], [834, 400, 889, 433, 0.5957823991775513, 'car'], [568, 326, 603, 353, 0.5336233377456665, 'car'], [613, 302, 641, 322, 0.31750786304473877, 'car']]


0: 384x640 5 cars, 70.8ms
Speed: 2.0ms preprocess, 70.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 81.8ms
Speed: 2.0ms preprocess, 81.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)



[[536, 352, 576, 393, 0.603405237197876, 'car'], [565, 327, 604, 355, 0.5601300001144409, 'car'], [829, 398, 890, 436, 0.5005990862846375, 'car'], [611, 303, 639, 322, 0.33033818006515503, 'car']]
[[535, 354, 573, 393, 0.6858079433441162, 'car'], [564, 329, 603, 356, 0.5305817723274231, 'car'], [828, 395, 884, 430, 0.4854579269886017, 'car'], [608, 305, 639, 325, 0.3217278718948364, 'car']]


0: 384x640 5 cars, 65.8ms
Speed: 2.0ms preprocess, 65.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 68.8ms
Speed: 2.0ms preprocess, 68.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[532, 354, 572, 394, 0.6979907155036926, 'car'], [826, 391, 883, 428, 0.5527169704437256, 'car'], [562, 330, 605, 359, 0.5520931482315063, 'car'], [608, 306, 639, 325, 0.38329607248306274, 'car']]
[[530, 355, 571, 397, 0.7502177953720093, 'car'], [561, 330, 604, 358, 0.6323993802070618, 'car'], [823, 391, 885, 427, 0.41101792454719543, 'car'], [608, 306, 635, 326, 0.4075894355773926, 'car']]


0: 384x640 5 cars, 66.8ms
Speed: 3.0ms preprocess, 66.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 61.8ms
Speed: 3.0ms preprocess, 61.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[527, 355, 570, 400, 0.6638322472572327, 'car'], [822, 388, 881, 422, 0.5744150280952454, 'car'], [561, 328, 606, 358, 0.49475204944610596, 'car'], [607, 306, 634, 326, 0.4870505928993225, 'car']]
[[520, 358, 578, 407, 0.6349712014198303, 'car'], [818, 385, 876, 419, 0.5620149970054626, 'car'], [559, 332, 592, 358, 0.49176356196403503, 'car'], [606, 306, 634, 326, 0.471804141998291, 'car']]


0: 384x640 5 cars, 68.8ms
Speed: 4.0ms preprocess, 68.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 65.8ms
Speed: 2.0ms preprocess, 65.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)



[[517, 361, 590, 409, 0.8075671792030334, 'car'], [557, 333, 608, 361, 0.40263187885284424, 'car'], [815, 382, 868, 416, 0.3960164487361908, 'car'], [606, 307, 633, 326, 0.37846502661705017, 'car']]
[[513, 363, 591, 411, 0.6226015090942383, 'car'], [814, 379, 869, 414, 0.41244497895240784, 'car'], [554, 338, 590, 360, 0.32361090183258057, 'car'], [605, 306, 631, 327, 0.3160513937473297, 'car']]



KeyboardInterrupt



In [None]:
output_file = 'output_video.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
frame_rate = 30  # Adjust this to your desired frame rate
width, height = frames[0].shape[1], frames[0].shape[0]  # Use dimensions from the first frame

# Create VideoWriter object
out = cv2.VideoWriter(output_file, fourcc, frame_rate, (width, height))

# Loop through the array of frames and write them to the video
for frame in frames:
    out.write(frame)

# Release the VideoWriter
out.release()

# Optionally, you can also destroy any OpenCV windows that might be open
cv2.destroyAllWindows()