In [2]:
!pip install ultralytics



In [3]:
import cv2
from ultralytics import YOLO
import os
import numpy as np

In [5]:
#YOLOv8 model(on COCO dataset)
model = YOLO('yolov8s.pt')

In [6]:
#Reading input
video_path = "/Users/manindragurung/Desktop/video/2.mp4"

if not os.path.isfile(video_path):
    raise FileNotFoundError(f"Video file not found at {video_path}")

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise IOError(f"Failed to open video file at {video_path}")

In [7]:
#Saving output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  
output_path = "/Users/manindragurung/Desktop/video/op101.mp4"
fps = int(cap.get(cv2.CAP_PROP_FPS))  # Frames per second of the input video
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))


In [11]:
while True:
    # Capturing frame-by-frame
    ret, frame = cap.read()
    if not ret:
        break

    #Running YOLOv8 on the frame
    results = model(frame)  

    people_boxes = [] 

    # Loop over the detected objects
    for result in results:
        for box in result.boxes:
            cls = int(box.cls[0])  
            #Process 'person' class
            if model.names[cls] == 'person': 
                #Confidence score
                conf = box.conf[0]  
                if conf > 0.5:  
                    
                    #Bounding box coordinates
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    
                    #Calculating the center of the bounding box
                    center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
                    people_boxes.append((center_x, center_y))
                    
                    #Drawing bounding box and label on the frame
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  
                    label = f'Person {conf:.2f}'
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    #Counting number of people
    num_people = len(people_boxes)
    groups = []  

    #Checking for groups 
    if num_people > 1:  
        
        #Tracking which people have been grouped
        visited = [False] * num_people  
        for i in range(num_people):
            if visited[i]:
                continue
            group = [i]
            for j in range(i + 1, num_people):
                
                #Calculating the Euclidean distance 
                distance = np.sqrt((people_boxes[i][0] - people_boxes[j][0]) ** 2 + (people_boxes[i][1] - people_boxes[j][1]) ** 2)

                #Checking the distance 
                if distance < proximity_threshold:
                    group.append(j)
                    visited[j] = True
            if len(group) > 1:  
                groups.append(group)

    #Displaying
    cv2.putText(frame, f'People: {num_people}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    #Displaying group 
    for idx, group in enumerate(groups):
        group_color = group_colors[idx % len(group_colors)]  
        group_size = len(group)
        group_label = f'Group {idx + 1}: {group_size} people'
        color = (0, 255, 255) if (idx + 1) == highlight_group else group_color

        #Drawing bounding boxes around the group members 
        for member_index in group:
            center_x, center_y = people_boxes[member_index]
            cv2.circle(frame, (center_x, center_y), 10, color, -1)  

        #Displaying the group label and count 
        cv2.putText(frame, group_label, (10, 60 + idx * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    #Highlighting the specified group 
    if highlight_group > len(groups):
        cv2.putText(frame, 'Specified group not found', (10, 90 + len(groups) * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
    out.write(frame)

    #Displaying the frame with detections
    cv2.imshow('Output', frame)

    #Breaking the loop 
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break



0: 384x640 22 persons, 3 handbags, 2 skateboards, 103.8ms
Speed: 4.2ms preprocess, 103.8ms inference, 7.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 1 handbag, 2 skateboards, 89.6ms
Speed: 2.1ms preprocess, 89.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 1 skateboard, 92.0ms
Speed: 1.4ms preprocess, 92.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 1 skateboard, 85.7ms
Speed: 1.3ms preprocess, 85.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 1 handbag, 3 skateboards, 89.7ms
Speed: 1.8ms preprocess, 89.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 3 skateboards, 107.4ms
Speed: 1.6ms preprocess, 107.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 3 skateboards, 97.6ms
Speed: 1.3ms preprocess, 97.6ms inference, 2.1ms postprocess

In [12]:
# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()