## Imports

In [1]:
from ultralytics import YOLO
import cv2 
from matplotlib import pyplot as plt
import time
import psutil
import sys
import threading
from queue import Queue

## Load model 

In [2]:
# Load models
v8n_person = YOLO("train_runs/yolov8n_person_100epochs/detect/train/weights/best.pt")
v8n_mask = YOLO("train_runs/yolov8n_mask_100epochs/train/weights/best.pt")
v11n_person = YOLO("train_runs/yolo11n_person_100epochs/train/weights/best.pt")
v11n_mask = YOLO("train_runs/yolo11n_mask_100epochs/train/weights/best.pt")

# Create model list
model_dict = {"v8n_person":v8n_person, "v8n_mask":v8n_mask, "v11n_person":v11n_person, "v11n_mask":v11n_mask}

## Test if model was loaded correctly

In [3]:
# Function to plot result images
def matplot_image(results: list, model_name: str):
    """
        Display the image using matplotlib
        :param results: list of results from the model
        :param model_name: name of the model
    """

    im  = results[0].plot()
    # Convert BGR (OpenCV format) to RGB (matplotlib format)
    im_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

    # Add model name to the image
    plt.title(model_name)

    # Display the image using matplotlib
    plt.imshow(im_rgb)
    plt.axis("off")  # Turn off axes for a cleaner display
    plt.show()


In [4]:
# Set confidence and IOU threshold
conf = 0.3
iou = 0.3

# Test models
for model_name, model in model_dict.items():
    for i in range(3):
        results = model.predict(f"data_sets/image_data/mask_person_test/{i}.jpg", conf=conf, iou=iou)

        # Show results
        #matplot_image(results, model_name)


image 1/1 /Users/lau/Documents/UNI/3. Semester/Project Computer Vision/My Project/data_sets/image_data/mask_person_test/0.jpg: 448x640 6 Personas, 35.3ms
Speed: 1.5ms preprocess, 35.3ms inference, 0.4ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 /Users/lau/Documents/UNI/3. Semester/Project Computer Vision/My Project/data_sets/image_data/mask_person_test/1.jpg: 384x640 10 Personas, 26.6ms
Speed: 1.0ms preprocess, 26.6ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /Users/lau/Documents/UNI/3. Semester/Project Computer Vision/My Project/data_sets/image_data/mask_person_test/2.jpg: 384x640 8 Personas, 29.0ms
Speed: 0.9ms preprocess, 29.0ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /Users/lau/Documents/UNI/3. Semester/Project Computer Vision/My Project/data_sets/image_data/mask_person_test/0.jpg: 448x640 1 improper_mask, 6 proper_masks, 31.2ms
Speed: 0.8ms preprocess, 31.2ms inference, 0.2ms postprocess per im

## Get RAM usage

In [5]:
# Get RAM usage
def get_ram_usage():
    """
        Get the RAM usage of the system
        :return: RAM usage in MB
    """


    # Get current process
    process = psutil.Process()

    # Get memory usage in bytes and convert to MB
    return process.memory_info().rss / (1024 ** 2)  # Resident Set Size (RSS)
    

## Put stats bar on frame

In [6]:
# Put stats bar on frame
def put_stats_bar(frame, t0_inf, t1_inf, t0_ann, t1_ann, loop_time, bar_height,font_scale, font_thickness):
    """
        Add a stats bar at the bottom of the frame
        :param frame: frame to add the stats bar to
        :param t0_inf: start time of inference
        :param t1_inf: end time of inference
        :param t0_ann: start time of annotation
        :param t1_ann: end time of annotation
        :param loop_time: time taken to process the frame
        :param bar_height: height of the stats bar
        :param font_scale: font scale of the text
        :param font_thickness: font thickness of the text
    """

    # Calculate inference time
    inference_time = (t1_inf - t0_inf) * 1000  # Convert to milliseconds
    
    # Calculate annotation time
    annotation_time = (t1_ann - t0_ann) * 1000  # Convert to milliseconds

    # Calculate max FPS
    max_fps = 1 / loop_time if loop_time > 0 else 0

    # Add text inside the rectangle
    stats_text = (
    f"Inference time: {inference_time:.2f} ms | "
    f"Annotation time: {annotation_time:.2f} ms | "
    f"Max FPS: {max_fps:.2f}"
    f"      Press 'q' to quit | Press '+' to increase confidence | Press '-' to decrease confidence"
    )
        
    # Add a rectangle at the bottom of the frame
    h, w, _ = frame.shape  # Get frame dimensions
    cv2.rectangle(frame, (0, h - bar_height), (w, h), (0, 0, 0), -1)  # Black rectangle

    # Add text inside the rectangle
    text_color = (255, 255, 255)  # White text
    cv2.putText(frame, stats_text, (10, h - 10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness)

## Test Webcam as input

In [7]:
## Set confidence and IOU threshold
#conf = 0.3
#iou = 0.3
#model = model_dict["v8n_mask"]
#
## Inference on video
#results = model.predict(source=0, stream=True, conf=conf, iou=iou)
#
## Create frame counter
#frame_count = 0
#
#for result in results:
#    # Extract the processed frame with predictions
#    t0 = time.time()    # Start time
#    frame = result.plot()  # Visualizes predictions on the frame
#    t1 = time.time()    # End time
#
#    # Calculate inference 
#    inference_time = (t1 - t0) * 1000  # Convert to milliseconds
#
#    # Calculate max FPS
#    max_fps = 1000 / inference_time
#
#    # Get RAM usage
#    ram_usage = get_ram_usage()
#
#    # Add text inside the rectangle
#    if frame_count % 20 == 0:
#        stats_text = (
#        f"Inference time: {inference_time:.2f} ms | "
#        f"RAM usage: {ram_usage:.2f} MB | "
#        f"Max FPS: {max_fps:.2f}"
#    )
#        # Reset frame count
#        frame_count = 0
#
#    # Add stats bar to the frame
#    put_stats_bar(frame, stats_text, 30, 0.5, 1)
#    
#    # Display the frame in a window
#    cv2.imshow('Live Predictions', frame)
#    
#    # Press 'q' to exit
#    if cv2.waitKey(1) & 0xFF == ord('q'):
#        break
#
#    frame_count += 1
#    
#
#cv2.destroyAllWindows()

## Test mp4 as input

In [8]:
# Set confidence and IOU threshold
conf_tresh = 0.3
iou_tresh = 0.3
pred_framerate = 2
model_person = v8n_person
model_mask = v8n_mask

# Initialize VideoCapture
cap = cv2.VideoCapture("data_sets/video_data/3205619-hd_1920_1080_25fps.mp4")

frame_count = 0
start_time = time.time()
loop_time = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    if frame_count % pred_framerate == 0:
        # Perform inference for person and mask models
        t0_inf = time.time()    # Start time
        result_person = model_person(frame, conf=conf_tresh, iou=iou_tresh) 
        result_mask = model_mask(frame, conf=conf_tresh, iou=iou_tresh)
        frame_count = 0 # Reset frame count
        t1_inf = time.time()    # End time

        loop_time = (time.time() - start_time)/pred_framerate
        start_time = time.time()

    t0_ann = time.time()    # Start time
    # Extract bounding boxes, confidences, and labels from both models
    for i in range(len(result_mask[0].boxes.cls)):
        x1, y1, x2, y2 = map(int, result_mask[0].boxes.xyxy[i])
        clss, conf = result_mask[0].boxes.cls[i], result_mask[0].boxes.conf[i]
        text = f"{model_mask.names[int(clss)]} ({conf:.2f})"

        if int(clss) == 0:
            colorcode = (255, 165, 0)
        elif int(clss) == 1:
            colorcode = (0, 0, 255)
        elif int(clss) == 2:
            colorcode = (0,255,0)
        cv2.rectangle(frame, (x1, y1), (x2, y2), colorcode, 2)  # Red for model 2
        cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colorcode, 2)

    for i in range(len(result_person[0].boxes.cls)):
        x1, y1, x2, y2 = map(int, result_person[0].boxes.xyxy[i])
        clss, conf = result_person[0].boxes.cls[i], result_person[0].boxes.conf[i]
        text = f"{model_person.names[int(clss)]} ({conf:.2f})"

        colorcode = (255, 0, 0)
        cv2.rectangle(frame, (x1, y1), (x2, y2), colorcode, 2)  # Red for model 2
        cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colorcode, 2)
    
    # Quit with 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Quit with 'q'
        break
    t1_ann = time.time()    # End time

    # Add stats bar to the frame
    put_stats_bar(frame, t0_inf, t1_inf, t0_ann, t1_ann, loop_time, 30, 0.5, 1)

    # Show the frame with bounding boxes
    cv2.imshow("Detections", frame)        
    frame_count += 1

cap.release()
cv2.destroyAllWindows()





0: 384x640 5 Personas, 32.9ms
Speed: 1.1ms preprocess, 32.9ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 no_masks, 26.4ms
Speed: 1.1ms preprocess, 26.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Personas, 25.0ms
Speed: 1.4ms preprocess, 25.0ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 no_masks, 23.5ms
Speed: 1.2ms preprocess, 23.5ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Personas, 26.4ms
Speed: 1.1ms preprocess, 26.4ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 no_masks, 24.3ms
Speed: 1.3ms preprocess, 24.3ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Personas, 24.1ms
Speed: 1.0ms preprocess, 24.1ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 no_masks, 21.4ms
Speed: 1.2ms preprocess, 21.4ms inference, 0.3ms postprocess per image at

2025-01-27 19:50:53.286 Python[37748:2960555] +[IMKClient subclass]: chose IMKClient_Modern
2025-01-27 19:50:53.286 Python[37748:2960555] +[IMKInputSession subclass]: chose IMKInputSession_Modern



0: 384x640 5 Personas, 24.0ms
Speed: 1.1ms preprocess, 24.0ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 no_masks, 25.1ms
Speed: 1.4ms preprocess, 25.1ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Personas, 29.0ms
Speed: 1.1ms preprocess, 29.0ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 no_masks, 24.3ms
Speed: 1.2ms preprocess, 24.3ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Personas, 29.0ms
Speed: 1.2ms preprocess, 29.0ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 no_masks, 24.6ms
Speed: 1.1ms preprocess, 24.6ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 Personas, 23.9ms
Speed: 0.9ms preprocess, 23.9ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 no_masks, 22.8ms
Speed: 1.3ms preprocess, 22.8ms inference, 0.2ms postprocess per image at

In [9]:
import cv2

# Load the image
frame = cv2.imread("data_sets/image_data/mask_person_test/1.jpg")

# Perform inference with both models
result_person = model_person(f"data_sets/image_data/mask_person_test/1.jpg", conf=conf, iou=iou)
result_mask = model_mask(f"data_sets/image_data/mask_person_test/1.jpg", conf=conf, iou=iou)

for i in range(len(result_mask[0].boxes.cls)):
    x1, y1, x2, y2 = map(int, result_mask[0].boxes.xyxy[i])
    clss, conf = result_mask[0].boxes.cls[i], result_mask[0].boxes.conf[i]
    text = f"{model_mask.names[int(clss)]} ({conf:.2f})"

    if int(clss) == 0:
        colorcode = (255, 165, 0)
    elif int(clss) == 1:
        colorcode = (0, 0, 255)
    elif int(clss) == 2:
        colorcode = (0,255,0)
    cv2.rectangle(frame, (x1, y1), (x2, y2), colorcode, 2)  # Red for model 2
    cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colorcode, 2)

for i in range(len(result_person[0].boxes.cls)):
    x1, y1, x2, y2 = map(int, result_person[0].boxes.xyxy[i])
    clss, conf = result_person[0].boxes.cls[i], result_person[0].boxes.conf[i]
    text = f"{model_mask.names[int(clss)]} ({conf:.2f})"

    colorcode = (255, 0, 0)
    cv2.rectangle(frame, (x1, y1), (x2, y2), colorcode, 2)  # Red for model 2
    cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colorcode, 2)

im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# Add model name to the image
plt.title(model_name)

# Display the image using matplotlib
plt.imshow(im_rgb)
plt.axis("off")  # Turn off axes for a cleaner display
plt.show()

TypeError: 'conf=0.6986196637153625' is of invalid type Tensor. Valid 'conf' types are int (i.e. 'conf=0') or float (i.e. 'conf=0.5')