<a href="https://colab.research.google.com/github/kirti-kshirsagar/Visual-Occupancy-Counter/blob/main/Occupancy_Counter_Script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.25-py3-none-any.whl (778 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/778.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m778.2/778.8 kB[0m [31m23.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.8/778.8 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_cupt

In [2]:
# Importing all the necessary libraries
import cv2
import json
from ultralytics import YOLO

# Main function used to track and count the people
def process_video(input_file, output_file):
    # Loading the YOLOv8 model
    model = YOLO("yolov8n.pt")

    # Opening the video file
    cap = cv2.VideoCapture(input_file)
    # Checking if the video file was opened successfully
    if not cap.isOpened():
        print(f"Error: Unable to open video file {input_file}")
        return
    # Calculating the frames per second (fps) and total frames of the video
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    # Initialize lists to store count data and change events
    total_count_data = []
    count_change_events = []
    # Initialize variable to keep track of the count in the previous frame
    previous_count = 0
    # Loops over each frame in the video
    for frame_idx in range(total_frames):
        # Read the next frame from the video
        ret, frame = cap.read()
        # If the frame could not be read, break the loop
        if not ret:
            break

        # Convert frame to RGB as YOLOv8 expects images in RGB format
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Detect people in the frame
        results = model.predict(source=rgb_frame, save=False, save_txt=False)

        # Check if results is a list
        if isinstance(results, list):
            # If results is a list, then each element of the list is a Detections object for a single image.
            # Since we're only processing one image at a time, we just need to take
            # the first element of the list.
            results = results[0]

        # Now results should be a Detections object, and we can access its boxes attribute
        boxes = results.boxes

        # Count people (class 0 is person in COCO dataset)
        current_people_count = sum(1 for box in boxes if int(box.cls) == 0)
        # Calculate the timestamp of the current frame
        timestamp = frame_idx / fps

        # Recording a count change event when people enter or exit
        if current_people_count != previous_count:
            # Record count change event
            event_type = "ENTER" if current_people_count > previous_count else "EXIT"
            count_change_events.append({
                "event_type": event_type,
                "timestamp": str(timestamp)
            })
            previous_count = current_people_count

        # Log total people count at this second
        total_count_data.append({
            "people_count": str(current_people_count),
            "timestamp": str(int(timestamp))
        })

    cap.release()

    # Prepare the output dictionary
    output_data = {
        "count_change_events": count_change_events,
        "total_count": total_count_data
    }

    # Write to the JSON file
    with open(output_file, 'w') as f:
        json.dump(output_data, f, indent=4)

    print(f"Output written to {output_file}")

if __name__ == "__main__":
    input_file = "user_activities.mp4"
    output_file = "activities.json"
    process_video(input_file, output_file)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 3.1ms preprocess, 12.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 cars, 2 traffic lights, 10.9ms
Speed: 3.0ms preprocess, 10.9ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 cars, 2 traffic lights, 13.6ms
Speed: 4.7ms preprocess, 13.6ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 cars, 2 traffic lights, 9.1ms
Speed: 4.9ms preprocess, 9.1ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 cars, 2 traffic lights, 8.9ms
Speed: 3.2ms preprocess, 8.9ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 3 cars, 2 traffic lights, 9.1ms
Speed: 2.9ms preprocess, 9.1ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 cars, 2 traffic lights, 15.5ms
Speed: 7.6ms preprocess