In [1]:
import torch

print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0))

CUDA available: True
GPU name: Tesla T4


In [2]:
!pip install ultralytics supervision opencv-python

Collecting ultralytics
  Downloading ultralytics-8.3.246-py3-none-any.whl.metadata (37 kB)
Collecting supervision
  Downloading supervision-0.27.0-py3-none-any.whl.metadata (13 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.246-py3-none-any.whl (1.2 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.2/1.2 MB[0m [31m73.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading supervision-0.27.0-py3-none-any.whl (212 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m212.4/212.4 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: supervision, ultralytics-thop, ultralytics
Successfully installed supervisi

In [3]:
!pip show ultralytics

Name: ultralytics
Version: 8.3.246
Summary: Ultralytics YOLO üöÄ for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
Home-page: https://ultralytics.com
Author: 
Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
License: AGPL-3.0
Location: /usr/local/lib/python3.12/dist-packages
Requires: matplotlib, numpy, opencv-python, pillow, polars, psutil, pyyaml, requests, scipy, torch, torchvision, ultralytics-thop
Required-by: 


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [29]:

import cv2
import argparse
import time
from ultralytics import YOLO
import supervision as sv
#import numpy as np
#import torch

def main():


    frame_width, frame_height = 640, 360

   # cap = cv2.VideoCapture(0)
    video_path = "/content/drive/MyDrive/Colab Notebooks/video/video1.mp4"
    OUTPUT_PATH = "/content/drive/MyDrive/Colab Notebooks/video/output.mp4"  # Output video path
    cap=cv2.VideoCapture(video_path)
    if not cap.isOpened():
      raise Exception(f"Cannot open video: {video_path}")
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)

    # Video writer to save output
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(OUTPUT_PATH, fourcc, 30, (frame_width, frame_height))
    PERSON_CLASS_ID = 0
    prev_time = 0
    model = YOLO("yolov8m.pt")

    box_annotator = sv.BoxAnnotator(
         thickness=2,
         color=sv.Color(0,255,0)
    )

    label_annotator = sv.LabelAnnotator(
    text_scale=0.3,
    text_thickness=1,
    text_color=sv.Color.RED,
    text_padding=1
)

    while True:
        ret, frame = cap.read()
        if not ret or frame is None:
            print("End of video or cannot read frame.")
            break
        # Resize for faster processing
        frame = cv2.resize(frame, (frame_width, frame_height))
        result = model(frame, agnostic_nms=True)[0]

        detections = sv.Detections.from_ultralytics(result)


        person_mask = detections.class_id == PERSON_CLASS_ID
        detections = detections[person_mask]

        person_count = len(detections)

        labels = [f"Person {detections.confidence[i]:.2f}"
                  for i in range(len(detections))]

        frame = box_annotator.annotate(
            scene=frame,
            detections=detections,

        )
        frame = label_annotator.annotate(scene=frame, detections=detections,labels=labels)
         # FPS calculation
        curr_time = time.time()
        fps = 1 / (curr_time - prev_time)
        prev_time = curr_time

        cv2.putText(
                frame,
                f"Total Persons: {person_count}",
                (20, 40),
                cv2.FONT_HERSHEY_COMPLEX,
                0.5,
                (255, 0, 0),
                2
            )
        cv2.putText(
        frame,
        f"FPS: {int(fps)}",
        (20, 80),
        cv2.FONT_HERSHEY_COMPLEX,
        0.5,
        (0, 0,255),
        2)
        out.write(frame)


    cap.release()
    out.release()
    print(f"Processing complete! Saved output to {OUTPUT_PATH}")

    from google.colab import files
    files.download("/content/drive/MyDrive/Colab Notebooks/video/output.mp4")
    print("Output File Downloaded")

if __name__ == "__main__":
    main()


0: 384x640 35 persons, 1 handbag, 25.6ms
Speed: 1.4ms preprocess, 25.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 1 handbag, 25.6ms
Speed: 1.5ms preprocess, 25.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 1 handbag, 24.4ms
Speed: 1.5ms preprocess, 24.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 23.7ms
Speed: 1.4ms preprocess, 23.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 1 backpack, 23.2ms
Speed: 1.5ms preprocess, 23.2ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 23.2ms
Speed: 1.4ms preprocess, 23.2ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 34 persons, 19.0ms
Speed: 1.4ms preprocess, 19.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 33 persons, 2 handbags, 17.9ms
Speed: 1.4ms pre

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Output File Downloaded
