In [1]:
from ultralytics import YOLO
import cv2
import os
import time

def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)  # Area of intersection
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])  # Area of boxA
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])  # Area of boxB
    if boxAArea == 0 or boxBArea == 0:
        return 0.0
    return interArea / float(boxAArea + boxBArea - interArea)

# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
model_path = r"C:\Users\joshu\Documents\EEE\AMNIS\Models\training\weights\best.pt"
video_path = r"C:\Users\joshu\Downloads\before and after vid.mp4"
save_dir = r"C:\Users\joshu\Documents\EEE\AMNIS\output_videos"
os.makedirs(save_dir, exist_ok=True)

# Detection thresholds
detection_conf_thresh = 0.4   # Confidence threshold for YOLO detections
detection_nms_thresh = 0.05     # NMS IOU threshold for YOLO detections

# Tracking thresholds
tracking_iou_thresh = 0.5      # IOU threshold for object tracking/matching
shelf_thresh = 100            # Vertical distance threshold for shelf grouping
lost_time_limit = 500        # Frames before declaring object as permanently lost

tracker_config = 'bytetrack.yaml'

# Initialize video capture to get frame for ROI selection
cap = cv2.VideoCapture(video_path)
ret, first_frame = cap.read()
if not ret:
    raise RuntimeError("Could not read first frame")

# Get ROI from user
print("Select ROI and press ENTER. Press C to cancel.")
roi = cv2.selectROI("Select ROI", first_frame, fromCenter=False, showCrosshair=True)
cv2.destroyWindow("Select ROI")
x_min, y_min, w, h = roi
x_max, y_max = x_min + w, y_min + h

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()

# Load YOLO model
model = YOLO(model_path)

# Initialize the tracking generator
generator = model.track(
    source=video_path,
    imgsz=640,
    conf=detection_conf_thresh,
    iou=detection_nms_thresh,
    tracker=tracker_config,
    stream=True
)

# Prepare video writer
output_video_path = os.path.join(save_dir, "before_after_640.mp4")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# State management
active_objects = []      # List of (label, box) tuples for current objects
lost_objects = {}        # Dict of label: (box, last_seen_frame) for lost objects
next_label_num = 1      # Counter for generating unique object labels
objects_that_left = set()  # Set of labels for objects that have left the scene
frame_count = 0
box_color = (0, 255, 0)

# Main processing loop
while True:
    try:
        results = next(generator)
    except StopIteration:
        break

    frame_count += 1
    frame = results.orig_img.copy()

    # Filter boxes to only include those within ROI
    boxes = []
    for b in results.boxes:
        x1, y1, x2, y2 = map(int, b.xyxy[0])
        if (x_min <= x1 <= x_max and x_min <= x2 <= x_max and
            y_min <= y1 <= y_max and y_min <= y2 <= y_max):
            boxes.append((x1, y1, x2, y2))

    if boxes:
        # Group boxes into shelves
        boxes.sort(key=lambda b: b[3])
        shelves = []
        for bx in boxes:
            x1, y1, x2, y2 = bx
            placed = False
            for shelf in shelves:
                shelf_bottoms = [b[3] for b in shelf]
                shelf_mean_bottom = sum(shelf_bottoms) / len(shelf_bottoms)
                if abs(y2 - shelf_mean_bottom) <= shelf_thresh:
                    shelf.append(bx)
                    placed = True
                    break
            if not placed:
                shelves.append([bx])

        # Sort shelves and create final box list
        for shelf in shelves:
            shelf.sort(key=lambda b: b[0])
        sorted_boxes = [b for shelf in shelves for b in shelf]

        # Track objects
        new_objects = []
        used_active_labels = set()

        for new_box in sorted_boxes:
            # Try to match with active objects
            best_iou = 0
            best_label = None
            for (lbl, a_box) in active_objects:
                iou_val = iou(a_box, new_box)
                if iou_val > best_iou:
                    best_iou = iou_val
                    best_label = lbl

            if best_iou > tracking_iou_thresh:
                new_objects.append((best_label, new_box))
                used_active_labels.add(best_label)
            else:
                # Try to match with lost objects
                best_iou_lost = 0
                best_lost_label = None
                for lbl, (l_box, l_frame) in lost_objects.items():
                    if frame_count - l_frame <= lost_time_limit:
                        iou_val = iou(l_box, new_box)
                        if iou_val > best_iou_lost:
                            best_iou_lost = iou_val
                            best_lost_label = lbl

                if best_iou_lost > tracking_iou_thresh and best_lost_label is not None:
                    new_objects.append((best_lost_label, new_box))
                    used_active_labels.add(best_lost_label)
                    del lost_objects[best_lost_label]
                    objects_that_left.discard(best_lost_label)
                else:
                    new_objects.append((next_label_num, new_box))
                    used_active_labels.add(next_label_num)
                    next_label_num += 1

        # Handle disappeared objects
        active_labels = {lbl for (lbl, _) in active_objects}
        disappeared = active_labels - used_active_labels
        for lbl in disappeared:
            for (l, box) in active_objects:
                if l == lbl:
                    lost_objects[lbl] = (box, frame_count)
                    objects_that_left.add(lbl)

        # Clean up lost objects
        lost_objects = {
            lbl: (box, frame) 
            for lbl, (box, frame) in lost_objects.items() 
            if frame_count - frame <= lost_time_limit
        }

        active_objects = new_objects

        # Draw visualizations
        # Draw ROI
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

        # Draw bounding boxes and labels
        for (lbl, (x1, y1, x2, y2)) in active_objects:
            cv2.rectangle(frame, (x1, y1), (x2, y2), box_color, 2)
            cv2.putText(frame, f"{lbl}", (x1 + 5, y1 + 20), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2)

        # Draw shelf boundaries
        for idx, shelf in enumerate(shelves, start=1):
            y = min(b[3] for b in shelf)
            cv2.line(frame, (0, y), (width, y), (0, 0, 255), 2)
            cv2.putText(frame, f"Shelf {idx}", (10, y - 10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        # Draw status information
        if objects_that_left:
            text = f"Objects that have left: {', '.join(map(str, objects_that_left))}"
            cv2.putText(frame, text, (100, 90), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

        cv2.putText(frame, f"Total objects in scene: {len(active_objects)}", 
                   (100, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    else:
        # Handle case when no boxes are detected
        for (lbl, box) in active_objects:
            lost_objects[lbl] = (box, frame_count)
            objects_that_left.add(lbl)
        active_objects = []

    out_writer.write(frame)

out_writer.release()
print(f"Processed video saved to {output_video_path}")

# Automatically play the processed video
def play_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_delay = int(1000/fps)  # Convert fps to milliseconds delay between frames
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        cv2.imshow('Processed Video', frame)
        
        # Break loop if 'q' is pressed or window is closed
        key = cv2.waitKey(frame_delay) & 0xFF
        if key == ord('q') or cv2.getWindowProperty('Processed Video', cv2.WND_PROP_VISIBLE) < 1:
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Play the processed video
print("Playing processed video (press 'q' to quit)...")
play_video(output_video_path)

Select ROI and press ENTER. Press C to cancel.

video 1/1 (frame 1/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 36.6ms
video 1/1 (frame 2/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 11.5ms
video 1/1 (frame 3/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 8.5ms
video 1/1 (frame 4/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 10.5ms
video 1/1 (frame 5/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 11.1ms
video 1/1 (frame 6/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 8.0ms
video 1/1 (frame 7/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 7.9ms
video 1/1 (frame 8/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 9.5ms
video 1/1 (frame 9/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 29 obs, 9.7ms
video 1/1 (frame 10/68) C:\Users\joshu\Downloads\before and after vid.mp4: 384x640 2

In [1]:
#trying canny edge detection in the preprocessing

from ultralytics import YOLO
import cv2
import os
import time
import numpy as np

def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)  # Area of intersection
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])  # Area of boxA
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])  # Area of boxB
    if boxAArea == 0 or boxBArea == 0:
        return 0.0
    return interArea / float(boxAArea + boxBArea - interArea)

# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
model_path = r"C:\Users\joshu\Documents\EEE\AMNIS\Models\training\weights\best.pt"
video_path = r"C:\Users\joshu\Downloads\before and after vid.mp4"
save_dir = r"C:\Users\joshu\Documents\EEE\AMNIS\output_videos"
os.makedirs(save_dir, exist_ok=True)

# Detection thresholds
detection_conf_thresh = 0.02   # Confidence threshold for YOLO detections
detection_nms_thresh = 0.05     # NMS IOU threshold for YOLO detections

# Tracking thresholds
tracking_iou_thresh = 0.5      # IOU threshold for object tracking/matching
shelf_thresh = 100            # Vertical distance threshold for shelf grouping
lost_time_limit = 500        # Frames before declaring object as permanently lost

tracker_config = 'bytetrack.yaml'

# Initialize video capture to get frame for ROI selection
cap = cv2.VideoCapture(video_path)
ret, first_frame = cap.read()
if not ret:
    raise RuntimeError("Could not read first frame")

# Apply Canny edge detection using Sobel filter
first_frame_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
grad_x = cv2.Sobel(first_frame_gray, cv2.CV_16S, 1, 0, ksize=3)
grad_y = cv2.Sobel(first_frame_gray, cv2.CV_16S, 0, 1, ksize=3)
grad_x = cv2.convertScaleAbs(grad_x)
grad_y = cv2.convertScaleAbs(grad_y)
combined_gradient = cv2.addWeighted(grad_x, 0.5, grad_y, 0.5, 0)
edges = cv2.Canny(combined_gradient, 100, 200)
first_frame_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)

# Overlay the edges onto the original frame for visualization
first_frame_overlay = cv2.addWeighted(first_frame, 0.8, first_frame_edges, 0.2, 0)

# Get ROI from user
print("Select ROI and press ENTER. Press C to cancel.")
roi = cv2.selectROI("Select ROI", first_frame_overlay, fromCenter=False, showCrosshair=True)
cv2.destroyWindow("Select ROI")
x_min, y_min, w, h = roi
x_max, y_max = x_min + w, y_min + h

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()

# Load YOLO model
model = YOLO(model_path)

# Initialize the tracking generator
generator = model.track(
    source=video_path,
    imgsz=960,
    conf=detection_conf_thresh,
    iou=detection_nms_thresh,
    tracker=tracker_config,
    stream=True
)

# Prepare video writer
output_video_path = os.path.join(save_dir, "sobel_filter.mp4")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# State management
active_objects = []      # List of (label, box) tuples for current objects
lost_objects = {}        # Dict of label: (box, last_seen_frame) for lost objects
next_label_num = 1      # Counter for generating unique object labels
objects_that_left = set()  # Set of labels for objects that have left the scene
frame_count = 0
box_color = (0, 255, 0)

# Main processing loop
while True:
    try:
        results = next(generator)
    except StopIteration:
        break

    frame_count += 1
    frame = results.orig_img.copy()

    # Apply Canny edge detection using Sobel filter
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    grad_x = cv2.Sobel(gray, cv2.CV_16S, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray, cv2.CV_16S, 0, 1, ksize=3)
    grad_x = cv2.convertScaleAbs(grad_x)
    grad_y = cv2.convertScaleAbs(grad_y)
    combined_gradient = cv2.addWeighted(grad_x, 0.5, grad_y, 0.5, 0)
    edges = cv2.Canny(combined_gradient, 100, 200)
    edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)

    # Overlay the edges onto the original frame
    frame = cv2.addWeighted(frame, 0.8, edges_colored, 0.2, 0)

    # Filter boxes to only include those within ROI
    boxes = []
    for b in results.boxes:
        x1, y1, x2, y2 = map(int, b.xyxy[0])
        if (x_min <= x1 <= x_max and x_min <= x2 <= x_max and
            y_min <= y1 <= y_max and y_min <= y2 <= y_max):
            boxes.append((x1, y1, x2, y2))

    if boxes:
        # Group boxes into shelves
        boxes.sort(key=lambda b: b[3])
        shelves = []
        for bx in boxes:
            x1, y1, x2, y2 = bx
            placed = False
            for shelf in shelves:
                shelf_bottoms = [b[3] for b in shelf]
                shelf_mean_bottom = sum(shelf_bottoms) / len(shelf_bottoms)
                if abs(y2 - shelf_mean_bottom) <= shelf_thresh:
                    shelf.append(bx)
                    placed = True
                    break
            if not placed:
                shelves.append([bx])

        # Sort shelves and create final box list
        for shelf in shelves:
            shelf.sort(key=lambda b: b[0])
        sorted_boxes = [b for shelf in shelves for b in shelf]

        # Track objects
        new_objects = []
        used_active_labels = set()

        for new_box in sorted_boxes:
            # Try to match with active objects
            best_iou = 0
            best_label = None
            for (lbl, a_box) in active_objects:
                iou_val = iou(a_box, new_box)
                if iou_val > best_iou:
                    best_iou = iou_val
                    best_label = lbl

            if best_iou > tracking_iou_thresh:
                new_objects.append((best_label, new_box))
                used_active_labels.add(best_label)
            else:
                # Try to match with lost objects
                best_iou_lost = 0
                best_lost_label = None
                for lbl, (l_box, l_frame) in lost_objects.items():
                    if frame_count - l_frame <= lost_time_limit:
                        iou_val = iou(l_box, new_box)
                        if iou_val > best_iou_lost:
                            best_iou_lost = iou_val
                            best_lost_label = lbl

                if best_iou_lost > tracking_iou_thresh and best_lost_label is not None:
                    new_objects.append((best_lost_label, new_box))
                    used_active_labels.add(best_lost_label)
                    del lost_objects[best_lost_label]
                    objects_that_left.discard(best_lost_label)
                else:
                    new_objects.append((next_label_num, new_box))
                    used_active_labels.add(next_label_num)
                    next_label_num += 1

        # Handle disappeared objects
        active_labels = {lbl for (lbl, _) in active_objects}
        disappeared = active_labels - used_active_labels
        for lbl in disappeared:
            for (l, box) in active_objects:
                if l == lbl:
                    lost_objects[lbl] = (box, frame_count)
                    objects_that_left.add(lbl)

        # Clean up lost objects
        lost_objects = {
            lbl: (box, frame) 
            for lbl, (box, frame) in lost_objects.items() 
            if frame_count - frame <= lost_time_limit
        }

        active_objects = new_objects

        # Draw visualizations
        # Draw ROI
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

        # Draw bounding boxes and labels
        for (lbl, (x1, y1, x2, y2)) in active_objects:
            cv2.rectangle(frame, (x1, y1), (x2, y2), box_color, 2)
            cv2.putText(frame, f"{lbl}", (x1 + 5, y1 + 20), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2)

        # Draw shelf boundaries
        for idx, shelf in enumerate(shelves, start=1):
            y = min(b[3] for b in shelf)
            cv2.line(frame, (0, y), (width, y), (0, 0, 255), 2)
            cv2.putText(frame, f"Shelf {idx}", (10, y - 10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        # Draw status information
        if objects_that_left:
            text = f"Objects that have left: {', '.join(map(str, objects_that_left))}"
            cv2.putText(frame, text, (100, 90), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

        cv2.putText(frame, f"Total objects in scene: {len(active_objects)}", 
                   (100, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    else:
        # Handle case when no boxes are detected
        for (lbl, box) in active_objects:
            lost_objects[lbl] = (box, frame_count)
            objects_that_left.add(lbl)
        active_objects = []

    out_writer.write(frame)

out_writer.release()
print(f"Processed video saved to {output_video_path}")

# Automatically play the processed video
def play_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_delay = int(1000/fps)  # Convert fps to milliseconds delay between frames
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        cv2.imshow('Processed Video', frame)
        
        # Break loop if 'q' is pressed or window is closed
        key = cv2.waitKey(frame_delay) & 0xFF
        if key == ord('q') or cv2.getWindowProperty('Processed Video', cv2.WND_PROP_VISIBLE) < 1:
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Play the processed video
print("Playing processed video (press 'q' to quit)...")
play_video(output_video_path)


Select ROI and press ENTER. Press C to cancel.

video 1/1 (frame 1/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 43.9ms
video 1/1 (frame 2/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 9.1ms
video 1/1 (frame 3/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 9.1ms
video 1/1 (frame 4/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 9.5ms
video 1/1 (frame 5/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 9.0ms
video 1/1 (frame 6/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 8.5ms
video 1/1 (frame 7/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 7.4ms
video 1/1 (frame 8/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 8.7ms
video 1/1 (frame 9/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 obs, 8.2ms
video 1/1 (frame 10/68) C:\Users\joshu\Downloads\before and after vid.mp4: 544x960 30 o

In [None]:
#trying some image pre processing


from ultralytics import YOLO
import cv2
import os
import time
import numpy as np

def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)  # Area of intersection
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])  # Area of boxA
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])  # Area of boxB
    if boxAArea == 0 or boxBArea == 0:
        return 0.0
    return interArea / float(boxAArea + boxBArea - interArea)

# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
model_path = r"C:\Users\joshu\Documents\EEE\AMNIS\Models\training\weights\best.pt"
video_path = r"C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20241204_102758_0.mp4"
save_dir = r"C:\Users\joshu\Documents\EEE\AMNIS\output_videos"
os.makedirs(save_dir, exist_ok=True)

# Detection thresholds
detection_conf_thresh = 0.02   # Confidence threshold for YOLO detections
detection_nms_thresh = 0.5     # NMS IOU threshold for YOLO detections

# Tracking thresholds
tracking_iou_thresh = 0.5      # IOU threshold for object tracking/matching
shelf_thresh = 100            # Vertical distance threshold for shelf grouping
lost_time_limit = 300         # Frames before declaring object as permanently lost

tracker_config = 'bytetrack.yaml'

# Initialize video capture to get frame for ROI selection
cap = cv2.VideoCapture(video_path)
ret, first_frame = cap.read()
if not ret:
    raise RuntimeError("Could not read first frame")

# Get ROI from user
print("Select ROI and press ENTER. Press C to cancel.")
roi = cv2.selectROI("Select ROI", first_frame, fromCenter=False, showCrosshair=True)
cv2.destroyWindow("Select ROI")
x_min, y_min, w, h = roi
x_max, y_max = x_min + w, y_min + h

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()

# Load YOLO model
model = YOLO(model_path)

# Initialize the tracking generator
generator = model.track(
    source=video_path,
    imgsz=640,
    conf=detection_conf_thresh,
    iou=detection_nms_thresh,
    tracker=tracker_config,
    stream=True
)

# Prepare video writer
output_video_path = os.path.join(save_dir, "tracked_with_roi.mp4")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# State management
active_objects = []      # List of (label, box) tuples for current objects
lost_objects = {}        # Dict of label: (box, last_seen_frame) for lost objects
next_label_num = 1      # Counter for generating unique object labels
objects_that_left = set()  # Set of labels for objects that have left the scene
frame_count = 0
box_color = (0, 255, 0)

# Main processing loop
while True:
    try:
        results = next(generator)
    except StopIteration:
        break

    frame_count += 1
    frame = results.orig_img.copy()

    # Enhance contrast and edges within the ROI
    roi_frame = frame[y_min:y_max, x_min:x_max]

    # Apply contrast enhancement (CLAHE)
    lab = cv2.cvtColor(roi_frame, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    l = clahe.apply(l)
    enhanced_lab = cv2.merge((l, a, b))
    roi_frame = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)

    # Apply edge detection
    edges = cv2.Canny(roi_frame, 100, 200)
    edges_colored = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
    roi_frame = cv2.addWeighted(roi_frame, 0.8, edges_colored, 0.2, 0)

    # Replace the ROI in the original frame
    frame[y_min:y_max, x_min:x_max] = roi_frame

    # Filter boxes to only include those within ROI
    boxes = []
    for b in results.boxes:
        x1, y1, x2, y2 = map(int, b.xyxy[0])
        if (x_min <= x1 <= x_max and x_min <= x2 <= x_max and
            y_min <= y1 <= y_max and y_min <= y2 <= y_max):
            boxes.append((x1, y1, x2, y2))

    if boxes:
        # Group boxes into shelves
        boxes.sort(key=lambda b: b[3])
        shelves = []
        for bx in boxes:
            x1, y1, x2, y2 = bx
            placed = False
            for shelf in shelves:
                shelf_bottoms = [b[3] for b in shelf]
                shelf_mean_bottom = sum(shelf_bottoms) / len(shelf_bottoms)
                if abs(y2 - shelf_mean_bottom) <= shelf_thresh:
                    shelf.append(bx)
                    placed = True
                    break
            if not placed:
                shelves.append([bx])

        # Sort shelves and create final box list
        for shelf in shelves:
            shelf.sort(key=lambda b: b[0])
        sorted_boxes = [b for shelf in shelves for b in shelf]

        # Track objects
        new_objects = []
        used_active_labels = set()

        for new_box in sorted_boxes:
            # Try to match with active objects
            best_iou = 0
            best_label = None
            for (lbl, a_box) in active_objects:
                iou_val = iou(a_box, new_box)
                if iou_val > best_iou:
                    best_iou = iou_val
                    best_label = lbl

            if best_iou > tracking_iou_thresh:
                new_objects.append((best_label, new_box))
                used_active_labels.add(best_label)
            else:
                # Try to match with lost objects
                best_iou_lost = 0
                best_lost_label = None
                for lbl, (l_box, l_frame) in lost_objects.items():
                    if frame_count - l_frame <= lost_time_limit:
                        iou_val = iou(l_box, new_box)
                        if iou_val > best_iou_lost:
                            best_iou_lost = iou_val
                            best_lost_label = lbl

                if best_iou_lost > tracking_iou_thresh and best_lost_label is not None:
                    new_objects.append((best_lost_label, new_box))
                    used_active_labels.add(best_lost_label)
                    del lost_objects[best_lost_label]
                    objects_that_left.discard(best_lost_label)
                else:
                    new_objects.append((next_label_num, new_box))
                    used_active_labels.add(next_label_num)
                    next_label_num += 1

        # Handle disappeared objects
        active_labels = {lbl for (lbl, _) in active_objects}
        disappeared = active_labels - used_active_labels
        for lbl in disappeared:
            for (l, box) in active_objects:
                if l == lbl:
                    lost_objects[lbl] = (box, frame_count)
                    objects_that_left.add(lbl)

        # Clean up lost objects
        lost_objects = {
            lbl: (box, frame) 
            for lbl, (box, frame) in lost_objects.items() 
            if frame_count - frame <= lost_time_limit
        }

        active_objects = new_objects

        # Draw visualizations
        # Draw ROI
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

        # Draw bounding boxes and labels
        for (lbl, (x1, y1, x2, y2)) in active_objects:
            cv2.rectangle(frame, (x1, y1), (x2, y2), box_color, 2)
            cv2.putText(frame, f"{lbl}", (x1 + 5, y1 + 20), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2)

        # Draw shelf boundaries
        for idx, shelf in enumerate(shelves, start=1):
            y = min(b[3] for b in shelf)
            cv2.line(frame, (0, y), (width, y), (0, 0, 255), 2)
            cv2.putText(frame, f"Shelf {idx}", (10, y - 10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        # Draw status information
        if objects_that_left:
            text = f"Objects that have left: {', '.join(map(str, objects_that_left))}"
            cv2.putText(frame, text, (100, 90), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

        cv2.putText(frame, f"Total objects in scene: {len(active_objects)}", 
                   (100, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    else:
        # Handle case when no boxes are detected
        for (lbl, box) in active_objects:
            lost_objects[lbl] = (box, frame_count)
            objects_that_left.add(lbl)
        active_objects = []

    out_writer.write(frame)

out_writer.release()
print(f"Processed video saved to {output_video_path}")

# Automatically play the processed video
def play_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_delay = int(1000/fps)  # Convert fps to milliseconds delay between frames
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        cv2.imshow('Processed Video', frame)
        
        # Break loop if 'q' is pressed or window is closed
        key = cv2.waitKey(frame_delay) & 0xFF
        if key == ord('q') or cv2.getWindowProperty('Processed Video', cv2.WND_PROP_VISIBLE) < 1:
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Play the processed video
print("Playing processed video (press 'q' to quit)...")
play_video(output_video_path)


Select ROI and press ENTER. Press C to cancel.

video 1/1 (frame 1/289) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20241204_102758_0.mp4: 384x640 20 obs, 51.9ms
video 1/1 (frame 2/289) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20241204_102758_0.mp4: 384x640 20 obs, 8.2ms
video 1/1 (frame 3/289) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20241204_102758_0.mp4: 384x640 20 obs, 8.5ms
video 1/1 (frame 4/289) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20241204_102758_0.mp4: 384x640 20 obs, 7.7ms
video 1/1 (frame 5/289) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20241204_102758_0.mp4: 384x640 20 obs, 8.5ms
video 1/1 (frame 6/289) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20241204_102758_0.mp4: 384x640 20 obs, 8.3ms
video 1/1 (frame 7/289) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241204_jp\20

In [6]:
#heatmap testing

from ultralytics import YOLO
import cv2
import os
import time
import numpy as np

def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)  # Area of intersection
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])  # Area of boxA
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])  # Area of boxB
    if boxAArea == 0 or boxBArea == 0:
        return 0.0
    return interArea / float(boxAArea + boxBArea - interArea)

# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
model_path = r"C:\Users\joshu\Documents\EEE\AMNIS\Models\training\weights\best.pt"
video_path = r"C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241213_ml_bad\20241213_120604_BL7.mp4"
save_dir = r"C:\Users\joshu\Documents\EEE\AMNIS\output_videos"
os.makedirs(save_dir, exist_ok=True)

# Detection thresholds
detection_conf_thresh = 0.1   # Confidence threshold for YOLO detections
detection_nms_thresh = 0.01   # NMS IOU threshold for YOLO detections

# Tracking thresholds
tracking_iou_thresh = 0.9  # IOU threshold for object tracking/matching
shelf_thresh = 100            # Vertical distance threshold for shelf grouping
lost_time_limit = 300         # Frames before declaring object as permanently lost

tracker_config = 'bytetrack.yaml'

# Initialize video capture to get frame for ROI selection
cap = cv2.VideoCapture(video_path)
ret, first_frame = cap.read()
if not ret:
    raise RuntimeError("Could not read first frame")

# Get ROI from user
print("Select ROI and press ENTER. Press C to cancel.")
roi = cv2.selectROI("Select ROI", first_frame, fromCenter=False, showCrosshair=True)
cv2.destroyWindow("Select ROI")
x_min, y_min, w, h = roi
x_max, y_max = x_min + w, y_min + h

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()

# Load YOLO model
model = YOLO(model_path)

# Initialize the tracking generator
generator = model.track(
    source=video_path,
    imgsz=640,
    conf=detection_conf_thresh,
    iou=detection_nms_thresh,
    tracker=tracker_config,
    stream=True
)

# Prepare video writer
output_video_path = os.path.join(save_dir, "heatmap.mp4")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# State management
active_objects = []      # List of (label, box) tuples for current objects
lost_objects = {}        # Dict of label: (box, last_seen_frame) for lost objects
next_label_num = 1      # Counter for generating unique object labels
objects_that_left = set()  # Set of labels for objects that have left the scene
frame_count = 0
box_color = (0, 255, 0)

# Function to create a Gaussian heatmap
def create_gaussian_heatmap(roi_shape, boxes):
    heatmap = np.zeros(roi_shape[:2], dtype=np.float32)
    for x1, y1, x2, y2 in boxes:
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
        sigma_x = max(1, (x2 - x1) // 2)
        sigma_y = max(1, (y2 - y1) // 2)
        for x in range(x1, x2):
            for y in range(y1, y2):
                heatmap[y, x] += np.exp(-(((x - cx) ** 2) / (2 * sigma_x ** 2) + ((y - cy) ** 2) / (2 * sigma_y ** 2)))
    heatmap = cv2.normalize(heatmap, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    return heatmap

# Main processing loop
while True:
    try:
        results = next(generator)
    except StopIteration:
        break

    frame_count += 1
    frame = results.orig_img.copy()

    # Extract ROI frame
    roi_frame = frame[y_min:y_max, x_min:x_max]

    # Filter boxes to only include those within ROI
    boxes = []
    for b in results.boxes:
        x1, y1, x2, y2 = map(int, b.xyxy[0])
        if (x_min <= x1 <= x_max and x_min <= x2 <= x_max and
            y_min <= y1 <= y_max and y_min <= y2 <= y_max):
            boxes.append((x1 - x_min, y1 - y_min, x2 - x_min, y2 - y_min))

    if boxes:
        # Create Gaussian heatmap
        heatmap = create_gaussian_heatmap(roi_frame.shape, boxes)
        heatmap_colored = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
        roi_frame = cv2.addWeighted(roi_frame, 0.7, heatmap_colored, 0.3, 0)

    # Replace the ROI in the original frame
    frame[y_min:y_max, x_min:x_max] = roi_frame

    # Write frame to video
    out_writer.write(frame)

out_writer.release()
print(f"Processed video saved to {output_video_path}")

# Automatically play the processed video
def play_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_delay = int(1000/fps)  # Convert fps to milliseconds delay between frames
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        cv2.imshow('Processed Video', frame)
        
        # Break loop if 'q' is pressed or window is closed
        key = cv2.waitKey(frame_delay) & 0xFF
        if key == ord('q') or cv2.getWindowProperty('Processed Video', cv2.WND_PROP_VISIBLE) < 1:
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Play the processed video
print("Playing processed video (press 'q' to quit)...")
play_video(output_video_path)


Select ROI and press ENTER. Press C to cancel.

video 1/1 (frame 1/141) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241213_ml_bad\20241213_120604_BL7.mp4: 384x640 29 obs, 24.6ms
video 1/1 (frame 2/141) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241213_ml_bad\20241213_120604_BL7.mp4: 384x640 29 obs, 15.6ms
video 1/1 (frame 3/141) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241213_ml_bad\20241213_120604_BL7.mp4: 384x640 29 obs, 15.2ms
video 1/1 (frame 4/141) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241213_ml_bad\20241213_120604_BL7.mp4: 384x640 29 obs, 20.8ms
video 1/1 (frame 5/141) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241213_ml_bad\20241213_120604_BL7.mp4: 384x640 29 obs, 16.0ms
video 1/1 (frame 6/141) C:\Users\joshu\Queen's University Belfast\Michael Loughran - 20241213_ml_bad\20241213_120604_BL7.mp4: 384x640 29 obs, 22.1ms
video 1/1 (frame 7/141) C:\Users\joshu\Queen's University 

In [1]:
from ultralytics import YOLO
import cv2
import os
import time

def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)  # Area of intersection
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])  # Area of boxA
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])  # Area of boxB
    if boxAArea == 0 or boxBArea == 0:
        return 0.0
    return interArea / float(boxAArea + boxBArea - interArea)

MAX_THUMBNAIL_WIDTH = 100

def overlay_object_images(frame, objects_that_left, object_images):
    height, width = frame.shape[:2]
    img_offset = 10
    
    y_start = height  # Start from bottom and go upwards

    for idx, lbl in enumerate(objects_that_left):
        if lbl in object_images:
            try:
                obj_img = object_images[lbl]
                if obj_img is None or obj_img.size == 0:
                    continue
                
                # Calculate the aspect ratio
                orig_h, orig_w = obj_img.shape[:2]
                scale = MAX_THUMBNAIL_WIDTH / float(orig_w)
                
                new_w = int(orig_w * scale)
                new_h = int(orig_h * scale)
                
                # Resize with same aspect ratio
                obj_img_resized = cv2.resize(obj_img, (new_w, new_h))
                
                # Now compute top-left corner so the thumbnail is placed above the previous one
                # Let's assume y_start decreases by new_h + offset each iteration
                y_start -= (new_h + img_offset)
                x_start = width - (new_w + img_offset)
                
                # Bounds check
                if x_start < 0 or y_start < 0:
                    continue
                
                # Overlay the object image
                frame[y_start:y_start+new_h, x_start:x_start+new_w] = obj_img_resized
                
                # Add label text above the thumbnail
                cv2.putText(frame, f"Object {lbl}",
                            (x_start, y_start - 5),
                            cv2.FONT_HERSHEY_SIMPLEX, 
                            0.5, 
                            (255, 255, 255),
                            1)
            
            except Exception as e:
                print(f"Error processing object {lbl}: {str(e)}")
                continue
    
    return frame

# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
model_path = r"C:\Users\joshu\Documents\EEE\AMNIS\Models\training\weights\best.pt"
video_path = r"C:\Users\joshu\Downloads\start_end_cornflakes.mp4"
save_dir = r"C:\Users\joshu\Documents\EEE\AMNIS\output_videos"
os.makedirs(save_dir, exist_ok=True)

# Detection thresholds
detection_conf_thresh = 0.4   # Confidence threshold for YOLO detections
detection_nms_thresh = 0.05   # NMS IOU threshold for YOLO detections

# Tracking thresholds
tracking_iou_thresh = 0.5     # IOU threshold for object tracking/matching
shelf_thresh = 100           # Vertical distance threshold for shelf grouping
lost_time_limit = 500        # Frames before declaring object as permanently lost

tracker_config = 'bytetrack.yaml'

# Initialize video capture to get frame for ROI selection
cap = cv2.VideoCapture(video_path)
ret, first_frame = cap.read()
if not ret:
    raise RuntimeError("Could not read first frame")

# Get ROI from user
print("Select ROI and press ENTER. Press C to cancel.")
roi = cv2.selectROI("Select ROI", first_frame, fromCenter=False, showCrosshair=True)
cv2.destroyWindow("Select ROI")
x_min, y_min, w, h = roi
x_max, y_max = x_min + w, y_min + h

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()

# Load YOLO model
model = YOLO(model_path)

# Initialize the tracking generator
generator = model.track(
    source=video_path,
    imgsz=640,
    conf=detection_conf_thresh,
    iou=detection_nms_thresh,
    tracker=tracker_config,
    stream=True
)

# Prepare video writer
output_video_path = os.path.join(save_dir, "cornflakes.mp4")
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# State management
active_objects = []      # List of (label, box) tuples for current objects
lost_objects = {}        # Dict of label: (box, last_seen_frame) for lost objects
next_label_num = 1      # Counter for generating unique object labels
objects_that_left = set()  # Set of labels for objects that have left the scene
frame_count = 0
box_color = (0, 255, 0)
object_first_seen = {}  # Dictionary to store the first frame and bounding box of each object
object_images = {}      # Dictionary to store object images for display
last_valid_images = {}  # Dictionary to store the last valid image of each object

# Main processing loop
while True:
    try:
        results = next(generator)
    except StopIteration:
        break

    frame_count += 1
    frame = results.orig_img.copy()

    # Filter boxes to only include those within ROI
    boxes = []
    for b in results.boxes:
        x1, y1, x2, y2 = map(int, b.xyxy[0])
        if (x_min <= x1 <= x_max and x_min <= x2 <= x_max and
            y_min <= y1 <= y_max and y_min <= y2 <= y_max):
            boxes.append((x1, y1, x2, y2))

    if boxes:
        # Group boxes into shelves
        boxes.sort(key=lambda b: b[3])
        shelves = []
        for bx in boxes:
            x1, y1, x2, y2 = bx
            placed = False
            for shelf in shelves:
                shelf_bottoms = [b[3] for b in shelf]
                shelf_mean_bottom = sum(shelf_bottoms) / len(shelf_bottoms)
                if abs(y2 - shelf_mean_bottom) <= shelf_thresh:
                    shelf.append(bx)
                    placed = True
                    break
            if not placed:
                shelves.append([bx])

        # Sort shelves and create final box list
        for shelf in shelves:
            shelf.sort(key=lambda b: b[0])
        sorted_boxes = [b for shelf in shelves for b in shelf]

        # Track objects
        new_objects = []
        used_active_labels = set()

        for new_box in sorted_boxes:
            # Try to match with active objects
            best_iou = 0
            best_label = None
            for (lbl, a_box) in active_objects:
                iou_val = iou(a_box, new_box)
                if iou_val > best_iou:
                    best_iou = iou_val
                    best_label = lbl

            if best_iou > tracking_iou_thresh:
                new_objects.append((best_label, new_box))
                used_active_labels.add(best_label)
                # Store the current image of the object while it's still visible
                x1, y1, x2, y2 = new_box
                if y1 < frame.shape[0] and y2 < frame.shape[0] and x1 < frame.shape[1] and x2 < frame.shape[1]:
                    last_valid_images[best_label] = frame[y1:y2, x1:x2].copy()
            else:
                # Assign a new label
                new_objects.append((next_label_num, new_box))
                object_first_seen[next_label_num] = (frame_count, new_box)
                # Store initial image of new object
                x1, y1, x2, y2 = new_box
                if y1 < frame.shape[0] and y2 < frame.shape[0] and x1 < frame.shape[1] and x2 < frame.shape[1]:
                    last_valid_images[next_label_num] = frame[y1:y2, x1:x2].copy()
                used_active_labels.add(next_label_num)
                next_label_num += 1

        # Handle disappeared objects
        active_labels = {lbl for (lbl, _) in active_objects}
        disappeared = active_labels - used_active_labels
        for lbl in disappeared:
            for (l, box) in active_objects:
                if l == lbl:
                    lost_objects[lbl] = (box, frame_count)
                    objects_that_left.add(lbl)
                    
                    # Use the last valid image we stored
                    if lbl in last_valid_images:
                        object_images[lbl] = last_valid_images[lbl]
                        # Optionally save to disk
                        image_path = os.path.join(save_dir, f"object_{lbl}.png")
                        cv2.imwrite(image_path, last_valid_images[lbl])
                        print(f"Saved image for object {lbl} at {image_path}")

        # Clean up lost objects
        lost_objects = {
            lbl: (box, frame) 
            for lbl, (box, frame) in lost_objects.items() 
            if frame_count - frame <= lost_time_limit
        }

        active_objects = new_objects

        # Draw visualizations
        # Draw ROI
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

        # Draw bounding boxes and labels
        for (lbl, (x1, y1, x2, y2)) in active_objects:
            cv2.rectangle(frame, (x1, y1), (x2, y2), box_color, 2)
            cv2.putText(frame, f"{lbl}", (x1 + 5, y1 + 20), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2)

        # Draw shelf boundaries
        for idx, shelf in enumerate(shelves, start=1):
            y = min(b[3] for b in shelf)
            cv2.line(frame, (0, y), (width, y), (0, 0, 255), 2)
            cv2.putText(frame, f"Shelf {idx}", (10, y - 10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        # Use the overlay_object_images function
        frame = overlay_object_images(frame, objects_that_left, object_images)

        # Draw status information
        if objects_that_left:
            text = f"Objects that have left: {', '.join(map(str, objects_that_left))}"
            cv2.putText(frame, text, (100, 90), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

        cv2.putText(frame, f"Total objects in scene: {len(active_objects)}", 
                   (100, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    else:
        # Handle case when no boxes are detected
        for (lbl, box) in active_objects:
            lost_objects[lbl] = (box, frame_count)
            objects_that_left.add(lbl)
        active_objects = []

    out_writer.write(frame)

cap.release()
out_writer.release()
print(f"Processed video saved to {output_video_path}")

# Play the processed video
def play_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_delay = int(1000/fps)  # Convert fps to milliseconds delay between frames
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        cv2.imshow('Processed Video', frame)
        
        # Break loop if 'q' is pressed or window is closed
        key = cv2.waitKey(frame_delay) & 0xFF
        if key == ord('q') or cv2.getWindowProperty('Processed Video', cv2.WND_PROP_VISIBLE) < 1:
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Play the processed video
print("Playing processed video (press 'q' to quit)...")
play_video(output_video_path)

Select ROI and press ENTER. Press C to cancel.

video 1/1 (frame 1/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 33.5ms
video 1/1 (frame 2/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 9.5ms
video 1/1 (frame 3/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 8.5ms
video 1/1 (frame 4/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 9.0ms
video 1/1 (frame 5/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 8.0ms
video 1/1 (frame 6/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 8.5ms
video 1/1 (frame 7/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 7.5ms
video 1/1 (frame 8/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 17.5ms
video 1/1 (frame 9/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 obs, 8.5ms
video 1/1 (frame 10/87) C:\Users\joshu\Downloads\start_end_cornflakes.mp4: 384x640 30 