In [None]:
!python --version

In [None]:
import ultralytics
ultralytics.__version__

In [None]:
import torch
torch.__version__

In [None]:
torch.cuda.get_device_name(0)

# Detect, track and count Persons

In [None]:
from ultralytics import YOLO

import time
import torch
import cv2
import torch.backends.cudnn as cudnn
from PIL import Image
import colorsys
import numpy as np

In [None]:
# Load a model
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

In [None]:
class_names = ['person', 'bicycle', 'car', 'motorcycle',
               'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign',
               'parking meter', 'bench', 'bird', 'cat', 'dog',
               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
               'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard',
               'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle',
               'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
               'banana', 'apple', 'sandwich', 'orange', 'broccoli',
               'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
               'couch', 'potted plant', 'bed', 'dining table', 'toilet',
               'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
               'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
               'toothbrush']


In [None]:
# IMAGE_PATH = "data/images/image.png"
# results = model(IMAGE_PATH, save=True)

In [None]:
# for result in results:
#     boxes = result.boxes  # Boxes object for bbox outputs
#     probs = result.probs  # Class probabilities for classification outputs
#     cls = boxes.cls.tolist()  # Convert tensor to list
#     xyxy = boxes.xyxy
#     xywh = boxes.xywh  # box with xywh format, (N, 4)
#     conf = boxes.conf
#     print(cls)
#     for class_index in cls:
#         class_name = class_names[int(class_index)]
#         print("Class:", class_name)

# DeepSORT

In [None]:
from deep_sort.utils.parser import get_config
from deep_sort.deep_sort import DeepSort
from deep_sort.sort.tracker import Tracker

deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'
tracker = DeepSort(model_path=deep_sort_weights, max_age=70)

In [None]:
VIDEO_PATH = "data/videos/Rec16-1.mp4"

In [None]:
cap = cv2.VideoCapture(VIDEO_PATH)

cap

In [None]:
# Get the video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

In [None]:
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_path = 'output.mp4'
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

device

In [None]:
from queue import Queue
from threading import Thread

In [None]:
def read_frames(cap, frame_queue, max_queue_size):
    while True:
        if frame_queue.qsize() < max_queue_size:
            ret, frame = cap.read()
            if not ret:
                break
            frame_queue.put(frame)
        else:
            time.sleep(0.1)  # Sleep briefly to prevent busy-waiting
    frame_queue.put(None)  # Signal end of video

In [None]:
CONFIDENCE_THRESHOLD = 0.5

def process_frames(frame_queue, result_queue, model):
    while True:
        frame = frame_queue.get()
        if frame is None:
            break
        
        og_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = model(og_frame, device=0, conf=CONFIDENCE_THRESHOLD)
        
        result_queue.put((og_frame, results))
    result_queue.put(None)  # Signal end of processing

In [None]:
def track_and_visualize(result_queue, output_queue, tracker, class_names):
    unique_track_ids = set()
    while True:
        item = result_queue.get()
        if item is None:
            break
        
        og_frame, results = item
        
        if len(results) == 0:
            # No detections in this frame
            output_queue.put(og_frame)
            continue
        
        result = results[0]  # Assuming single image input
        boxes = result.boxes
        cls = boxes.cls.tolist()
        xyxy = boxes.xyxy
        conf = boxes.conf
        xywh = boxes.xywh
        
        if len(cls) == 0:
            # No classes detected in this frame
            output_queue.put(og_frame)
            continue
        
        pred_cls = np.array(cls)
        conf = conf.detach().cpu().numpy()
        xyxy = xyxy.detach().cpu().numpy()
        bboxes_xywh = xywh.cpu().numpy()
        
        tracks = tracker.update(bboxes_xywh, conf, og_frame)
        for track in tracker.tracker.tracks:
            track_id = track.track_id
            x1, y1, x2, y2 = track.to_tlbr()
            w = x2 - x1
            h = y2 - y1
            
            color_id = track_id % 3
            color = [(0, 0, 255), (255, 0, 0), (0, 255, 0)][color_id]
            
            cv2.rectangle(og_frame, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
            
            # Safely get class name
            class_index = int(cls[track_id % len(cls)]) if cls else 0
            class_name = class_names[class_index] if class_index < len(class_names) else "Unknown"
            
            cv2.putText(og_frame, f"{class_name}-{track_id}", (int(x1) + 10, int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            
            unique_track_ids.add(track_id)
        
        person_count = len(unique_track_ids)
        cv2.putText(og_frame, f"Person Count: {person_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        output_queue.put(og_frame)
    output_queue.put(None)  # Signal end of tracking

In [None]:
def write_video(output_queue, out):
    while True:
        frame = output_queue.get()
        if frame is None:
            break
        out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

In [None]:
frame_queue = Queue(maxsize=30)
result_queue = Queue(maxsize=30)
output_queue = Queue(maxsize=30)

In [None]:
read_thread = Thread(target=read_frames, args=(cap, frame_queue, 30))
process_thread = Thread(target=process_frames, args=(frame_queue, result_queue, model))
track_thread = Thread(target=track_and_visualize, args=(result_queue, output_queue, tracker, class_names))
write_thread = Thread(target=write_video, args=(output_queue, out))

In [None]:
# read_thread.start()
# process_thread.start()
# track_thread.start()
# write_thread.start()

# read_thread.join()
# process_thread.join()
# track_thread.join()
# write_thread.join()

# cap.release()
# out.release()
# cv2.destroyAllWindows()

In [None]:
# frames = []

# unique_track_ids = set()

# i = 0
# counter, fps, elapsed = 0, 0, 0
# start_time = time.perf_counter()

# while cap.isOpened() and i < 300:
#     ret, frame = cap.read()

#     if ret:
        
#         og_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#         frame = og_frame.copy()

#         model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

#         results = model(frame, device=0, conf=0.5)

#         class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

#         for result in results:
#             boxes = result.boxes  # Boxes object for bbox outputs
#             probs = result.probs  # Class probabilities for classification outputs
#             cls = boxes.cls.tolist()  # Convert tensor to list
#             xyxy = boxes.xyxy
#             conf = boxes.conf
#             xywh = boxes.xywh  # box with xywh format, (N, 4)
#             for class_index in cls:
#                 class_name = class_names[int(class_index)]
#                 #print("Class:", class_name)

#         pred_cls = np.array(cls)
#         conf = conf.detach().cpu().numpy()
#         xyxy = xyxy.detach().cpu().numpy()
#         bboxes_xywh = xywh
#         bboxes_xywh = xywh.cpu().numpy()
#         bboxes_xywh = np.array(bboxes_xywh, dtype=float)
        
#         tracks = tracker.update(bboxes_xywh, conf, og_frame)
        
#         for track in tracker.tracker.tracks:
#             track_id = track.track_id
#             hits = track.hits
#             x1, y1, x2, y2 = track.to_tlbr()  # Get bounding box coordinates in (x1, y1, x2, y2) format
#             w = x2 - x1  # Calculate width
#             h = y2 - y1  # Calculate height

#             # Set color values for red, blue, and green
#             red_color = (0, 0, 255)  # (B, G, R)
#             blue_color = (255, 0, 0)  # (B, G, R)
#             green_color = (0, 255, 0)  # (B, G, R)

#             # Determine color based on track_id
#             color_id = track_id % 3
#             if color_id == 0:
#                 color = red_color
#             elif color_id == 1:
#                 color = blue_color
#             else:
#                 color = green_color

#             cv2.rectangle(og_frame, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)

#             text_color = (0, 0, 0)  # Black color for text
#             cv2.putText(og_frame, f"{class_name}-{track_id}", (int(x1) + 10, int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1, cv2.LINE_AA)

#             # Add the track_id to the set of unique track IDs
#             unique_track_ids.add(track_id)

#         # Update the person count based on the number of unique track IDs
#         person_count = len(unique_track_ids)

#         # Update FPS and place on frame
#         current_time = time.perf_counter()
#         elapsed = (current_time - start_time)
#         counter += 1
#         if elapsed > 1:
#             fps = counter / elapsed
#             counter = 0
#             start_time = current_time
        
#         i += 1

#         # Draw person count on frame
#         cv2.putText(og_frame, f"Person Count: {person_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

#         # Append the frame to the list
#         frames.append(og_frame)

#         # Write the frame to the output video file
#         out.write(cv2.cvtColor(og_frame, cv2.COLOR_RGB2BGR))

#         # Show the frame
#         #cv2.imshow("Video", og_frame)
# #         if cv2.waitKey(1) & 0xFF == ord('q'):
# #             break

# cap.release()
# out.release()
# cv2.destroyAllWindows()


# YOLO

In [None]:
def yolo_read_frames(yolo_cap, yolo_frame_queue, yolo_max_queue_size):
    while True:
        if yolo_frame_queue.qsize() < yolo_max_queue_size:
            ret, frame = yolo_cap.read()
            if not ret:
                break
            yolo_frame_queue.put(frame)
        else:
            time.sleep(0.1)
    yolo_frame_queue.put(None)

In [None]:
YOLO_CONFIDENCE_THRESHOLD = 0.5

def yolo_process_frames(yolo_frame_queue, yolo_result_queue, yolo_model):
    # Class indices for person, car, truck, bus, and motorcycle in COCO dataset
    yolo_target_classes = [0, 2, 7, 5, 3]
    
    while True:
        yolo_frame = yolo_frame_queue.get()
        if yolo_frame is None:
            break
        
        # Run YOLOv8 inference with specific classes and confidence threshold
        yolo_results = yolo_model(yolo_frame, classes=yolo_target_classes, conf=YOLO_CONFIDENCE_THRESHOLD)
        
        yolo_result_queue.put((yolo_frame, yolo_results))
    
    yolo_result_queue.put(None)

In [None]:
def yolo_write_video(yolo_result_queue, yolo_out):
    while True:
        yolo_item = yolo_result_queue.get()
        if yolo_item is None:
            break
        yolo_frame, yolo_results = yolo_item
        yolo_annotated_frame = yolo_results[0].plot()
        yolo_out.write(yolo_annotated_frame)

In [None]:
def yolo_process_video(yolo_input_path, yolo_output_path, yolo_model_path="yolov8n.pt"):
    yolo_model = YOLO(yolo_model_path)
    
    yolo_cap = cv2.VideoCapture(yolo_input_path)
    yolo_width = int(yolo_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    yolo_height = int(yolo_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    yolo_fps = int(yolo_cap.get(cv2.CAP_PROP_FPS))
    
    yolo_fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    yolo_out = cv2.VideoWriter(yolo_output_path, yolo_fourcc, yolo_fps, (yolo_width, yolo_height))

    yolo_frame_queue = Queue(maxsize=30)
    yolo_result_queue = Queue(maxsize=30)

    yolo_read_thread = Thread(target=yolo_read_frames, args=(yolo_cap, yolo_frame_queue, 30))
    yolo_process_thread = Thread(target=yolo_process_frames, args=(yolo_frame_queue, yolo_result_queue, yolo_model))
    yolo_write_thread = Thread(target=yolo_write_video, args=(yolo_result_queue, yolo_out))

    yolo_read_thread.start()
    yolo_process_thread.start()
    yolo_write_thread.start()

    yolo_read_thread.join()
    yolo_process_thread.join()
    yolo_write_thread.join()

    yolo_cap.release()
    yolo_out.release()
    cv2.destroyAllWindows()

In [None]:
yolo_input_video = "data/videos/Rec16-1.mp4"
yolo_output_video = "output/yolo_output.mp4"
yolo_process_video(yolo_input_video, yolo_output_video)