In [None]:
import cv2
import numpy as np
from sort import Sort

# Define constants
VIDEO_PATH = "input_video.mp4"
OUTPUT_PATH = "output_video.mp4"
ROI_COORDINATES = [(0.2, 0.2), (0.8, 0.2), (0.8, 0.8), (0.2, 0.8)]  # Example relative ROI coordinates
FPS = 1
FONT = cv2.FONT_HERSHEY_SIMPLEX

# Load YOLO model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Initialize object tracker
tracker = Sort()

def relative_to_pixel_coordinates(relative_coordinates, frame_shape):
    height, width = frame_shape[:2]
    pixel_coordinates = [(int(coord[0] * width), int(coord[1] * height)) for coord in relative_coordinates]
    return pixel_coordinates

def is_inside_roi(point, roi_coordinates):
    return cv2.pointPolygonTest(np.array(roi_coordinates), point, False) >= 0

def calculate_stay_duration(entered_time, current_time):
    return current_time - entered_time

def main():
    cap = cv2.VideoCapture(VIDEO_PATH)

    # Calculate frame interval based on desired output FPS
    frame_interval = int(cap.get(cv2.CAP_PROP_FPS) / FPS)

    # Prepare output video writer
    frame_width = int(cap.get(3))  # Width
    frame_height = int(cap.get(4))  # Height
    out = cv2.VideoWriter(OUTPUT_PATH, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (frame_width, frame_height))

    entered_persons = {}  # Dictionary to store entered persons and their enter times

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            # Object detection
            blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
            net.setInput(blob)
            detections = net.forward(output_layers)

            # Object tracking
            tracked_objects = tracker.update(detections)

            # Draw ROI
            pixel_roi = relative_to_pixel_coordinates(ROI_COORDINATES, frame.shape)
            cv2.polylines(frame, [np.array(pixel_roi)], isClosed=True, color=(0, 255, 0), thickness=2)

            # Process tracked objects
            for person_id, bbox in tracked_objects.items():
                pixel_center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2))
                if is_inside_roi(pixel_center, pixel_roi):
                    if person_id not in entered_persons:
                        entered_persons[person_id] = {"enter_time": frame_count / FPS}
                    else:
                        entered_time = entered_persons[person_id]["enter_time"]
                        stay_duration = calculate_stay_duration(entered_time, frame_count / FPS)
                        entered_persons[person_id]["stay_duration"] = stay_duration

                    # Draw stay duration
                    text = f"Person {person_id}: {entered_persons[person_id]['stay_duration']:.1f}s"
                    cv2.putText(frame, text, (bbox[0], bbox[1] - 10), FONT, 0.5, (0, 255, 0), 2)

            # Write frame to output video
            out.write(frame)

        frame_count += 1

    cap.release()
    out.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()
