### Problem Statement

### Solution

In [5]:
#Required Libraries:
import cv2
import torch
import numpy as np
import imageio
import warnings
warnings.filterwarnings("ignore")

# Loading the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Loading the input video
cap = cv2.VideoCapture('video_people.mp4')

# Defining the detection areas
area_1 = [(1, 1), (720, 1), (720, 400), (1, 400)]

# Initializing required variables
person_dict = {}  # To store unique person IDs and their counts
person_count = 0  # Incremental count for persons
proximity_threshold = 45  # Set the proximity threshold for person detection
frame_counter = 0  # Counter to skip frames
frame_skip = 2  # Process every 2nd frame

# Getting input video details
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Creating video writer object
output_file = 'output_video.mp4'
out = imageio.get_writer(output_file, format='FFMPEG', mode='I', fps=fps)

# Creating Main loop for processing video frames
while True:
    ret, frame = cap.read()

    frame_counter += 1
    if frame_counter % frame_skip != 0:
        continue

    if not ret:
        break

    # Resizing frame
    frame = cv2.resize(frame, (720, 400))
    cv2.polylines(frame, [np.array(area_1, np.int32)], True, (0, 255, 0), 2)

    # Applying Model
    results = model(frame)

    for index, row in results.pandas().xyxy[0].iterrows():
        x1 = int(row["xmin"])
        y1 = int(row["ymin"])
        x2 = int(row["xmax"])
        y2 = int(row["ymax"])
        label = row["name"]

        if label == "person":
            result = cv2.pointPolygonTest(np.array(area_1, np.int32), (x1, y1), False)
            if result > 0:
                centroid_x = (x1 + x2) // 2
                centroid_y = (y1 + y2) // 2
                person_id = (centroid_x, centroid_y)

                person_detected = False
                for prev_person_id in person_dict.keys():
                    prev_centroid_x, prev_centroid_y = prev_person_id
                    distance = np.sqrt((prev_centroid_x - centroid_x) ** 2 + (prev_centroid_y - centroid_y) ** 2)
                    if distance < proximity_threshold:
                        person_detected = True
                        person_id = prev_person_id  # Updating person ID based on proximity
                        break

                if not person_detected:
                    if person_id not in person_dict:  # Checking if the person is already counted
                        person_dict[person_id] = person_count  # Adding person ID to the dictionary with count
                        person_count += 1  # Increment the person count

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Drawing rectangle around detected person
                cv2.putText(frame, f"Person {person_dict[person_id]}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9,
                            (0, 255, 0), 1)  # Display label and ID

        elif label in ["handbag", "backpack"]:
            result = cv2.pointPolygonTest(np.array(area_1, np.int32), (x1, y2), False)
            if result > 0:
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Draw rectangle around detected bag
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255),
                            2)  # Display label for bag

    cv2.putText(frame, "Persons: {}".format(person_count), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    cv2.imshow('FRAME', frame)

    # Write the processed frame to the output video
    out.append_data(frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
out.close()
cv2.destroyAllWindows()

Using cache found in C:\Users\suyog/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-6-13 Python-3.9.13 torch-2.0.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 
