In [1]:
# Install YOLOv8 (Ultralytics)
!pip install ultralytics

# Install DeepSORT
!pip install deep_sort_realtime


Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [29]:
import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import os

In [30]:
# Initialize YOLOv8 model and DeepSORT tracker
yolo_model = YOLO("yolov8m.pt")  # Load YOLOv8 model
tracker = DeepSort(max_age=30, n_init=3, nn_budget=70)

# Function to generate colors based on track ID
def generate_color(track_id):
    track_id = int(track_id)
    return (track_id * 53 % 255, track_id * 101 % 255, track_id * 197 % 255)

In [32]:
# Function to process video
def process_video(input_path, output_path, model, tracker, conf_threshold=0.5):
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        raise IOError(f"Unable to open video file: {input_path}")

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*"H264")
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    # Metrics
    object_times = {}  # Store total time per object
    unique_objects = set()  # Set to track unique object IDs

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # YOLO inference
        results = model(frame, conf=conf_threshold)
        detections = results[0].boxes.data.cpu().numpy()  # Extract detection results

        dets = []
        for det in detections:
            x1, y1, x2, y2, confidence, cls = det
            if confidence >= conf_threshold:
                dets.append([[x1, y1, x2 - x1, y2 - y1], confidence, int(cls)])

        # Update tracker
        tracks = tracker.update_tracks(dets, frame=frame)

        # Annotate frame and update metrics
        for track in tracks:
            if not track.is_confirmed():
                continue

            track_id = track.track_id
            x1, y1, x2, y2 = map(int, track.to_tlbr())
            centroid = (int((x1 + x2) / 2), int((y1 + y2) / 2))
            label = model.names[int(cls)]  # Get the label for the class ID

            # Update object time
            if track_id not in object_times:
                object_times[track_id] = 0
            object_times[track_id] += 1  # Increment time for this object

            unique_objects.add(track_id)  # Add to unique IDs set

            # Calculate time in seconds for the object
            time_in_seconds = object_times[track_id] / fps  # Convert frames to seconds

            # Assign a dynamic color for the track
            track_color = generate_color(track_id)

            # Draw bounding box and centroid
            cv2.rectangle(frame, (x1, y1), (x2, y2), track_color, 2)
            cv2.circle(frame, (centroid[0], centroid[1]), 5, track_color, -1)

            # Display the track ID, label, and time in seconds
            label_text = f"ID:{track_id}-{label}-{time_in_seconds:.2f}s"
            cv2.putText(frame, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

        # Write frame to output video
        out.write(frame)

    cap.release()
    out.release()
    



    # Convert object times to seconds
    object_times = {obj_id: time / fps for obj_id, time in object_times.items()}

    print(f"Unique Objects Detected: {len(unique_objects)}")
    for obj_id, time in object_times.items():
        print(f"Object {obj_id} spent {time:.2f} seconds in the video.")

    print(f"Processing complete! Output saved to: {output_path}")
    return object_times, unique_objects


In [28]:


# Function to generate HTML report
def generate_html_report(html_path, video_path, object_times, unique_objects):
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Object Tracking Report</title>
        <style>
            body {{
                font-family: Arial, sans-serif;
                margin: 0;
                padding: 0;
                background-color: #f4f4f4;
                color: #333;
            }}
            header {{
                background-color: #0073e6;
                color: white;
                text-align: center;
                padding: 20px;
                font-size: 28px;
            }}
            main {{
                max-width: 800px;
                margin: 40px auto;
                background: white;
                padding: 20px;
                border-radius: 10px;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            }}
            video {{
                width: 100%;
                height: auto;
                border: 2px solid #0073e6;
                border-radius: 10px;
                margin-bottom: 20px;
            }}
            table {{
                width: 100%;
                border-collapse: collapse;
                margin: 20px 0;
            }}
            th, td {{
                padding: 12px;
                text-align: center;
                border: 1px solid #ddd;
            }}
            th {{
                background-color: #0073e6;
                color: white;
            }}
            tr:nth-child(even) {{
                background-color: #f9f9f9;
            }}
            tr:hover {{
                background-color: #f1f1f1;
            }}
            footer {{
                text-align: center;
                padding: 10px;
                background-color: #0073e6;
                color: white;
                margin-top: 20px;
            }}
        </style>
    </head>
    <body>
        <header>Object Tracking Report</header>
        <main>
            <h2>Tracked Video</h2>
            <video controls>
                <source src="{os.path.basename(video_path)}" type="video/mp4">
                Your browser does not support the video tag.
            </video>
            <h2>Summary</h2>
            <table>
                <thead>
                    <tr>
                        <th>Object ID</th>
                        <th>Total Time (seconds)</th>
                    </tr>
                </thead>
                <tbody>
    """
    for obj_id, time in object_times.items():
        html_content += f"""
                    <tr>
                        <td>{obj_id}</td>
                        <td>{time:.2f}</td>
                    </tr>
        """
    html_content += f"""
                </tbody>
            </table>
            <h3>Total Unique Objects Detected: {len(unique_objects)}</h3>
        </main>
        <footer>
            &copy; 2024 Object Tracking System
        </footer>
    </body>
    </html>
    """
    with open(html_path, "w") as html_file:
        html_file.write(html_content)
    print(f"HTML report saved to: {html_path}")


if __name__ == "__main__":
    input_video = "C:/Users/santh/Downloads/project/macv-obj-tracking-video.mp4"
    output_video = "C:/Users/santh/Downloads/project/output-tracking-video_3.mp4"
    html_report = "C:/Users/santh/Downloads/project/object_tracking_report.html"

    # Process the video and generate object tracking data
    object_times, unique_objects = process_video(input_video, output_video, yolo_model, tracker)

    # Generate the HTML report
    generate_html_report(html_report, output_video, object_times, unique_objects)



0: 384x640 7 persons, 1 handbag, 1 tv, 442.3ms
Speed: 6.0ms preprocess, 442.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 handbag, 1 tv, 392.3ms
Speed: 6.5ms preprocess, 392.3ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 handbag, 1 tv, 327.8ms
Speed: 5.0ms preprocess, 327.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 handbag, 1 tv, 463.5ms
Speed: 6.0ms preprocess, 463.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 handbag, 1 tv, 511.2ms
Speed: 5.0ms preprocess, 511.2ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 tv, 367.9ms
Speed: 10.0ms preprocess, 367.9ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 persons, 1 tv, 418.9ms
Speed: 4.0ms preprocess, 418.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)