In [1]:
import os
import cv2
import torch
import json
from tqdm import tqdm
from IPython.display import Image
import time

from ultralytics import YOLO
from sahi.auto_model import AutoDetectionModel
from sahi.predict import get_sliced_prediction, get_prediction, predict
from sahi.utils.coco import Coco
from sahi.utils.cv import read_image
from sahi.utils.file import download_from_url
from sahi.utils.ultralytics import download_yolo11n_model, download_yolo11n_seg_model

import pandas as pd
import numpy as np
import itertools

In [None]:
# from roboflow import Roboflow
# rf = Roboflow(api_key="Y2nBN4Hv8Mp8887wHuR9")
# project = rf.workspace("my-projects-ztgl4").project("smoking-person-m1shy")
# version = project.version(3)
# dataset = version.download("yolov11")

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Smoking-person-3 to yolov11:: 100%|██████████| 44630/44630 [00:26<00:00, 1666.74it/s]





Extracting Dataset Version Zip to Smoking-person-3 in yolov11:: 100%|██████████| 1966/1966 [00:00<00:00, 2073.27it/s]


In [None]:
image_folder = r"Smoking-person-3\test\images" 
output_folder = r"Smoking-person-3\test\sahi predicted 8\k aug moi"
visualized_folder = r"Smoking-person-3\test\sahi predicted 8\visualized_k aug moi"

os.makedirs(output_folder, exist_ok=True)
os.makedirs(visualized_folder, exist_ok=True)
detection_model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model_path=r"models\yolov11_best.pt",
    confidence_threshold=0.5,
    device="cuda"
)

for filename in tqdm(os.listdir(image_folder)): 
    if filename.endswith(('.jpg')):   
        image_path = os.path.join(image_folder, filename) 
        image = cv2.imread(image_path)
        results = get_sliced_prediction( 
            image_path, 
            detection_model, 
            slice_height=440, 
            slice_width=440, 
            overlap_height_ratio=0.4,  
            overlap_width_ratio=0.4
        ) 
        
        
        txt_filename = os.path.splitext(filename)[0] + ".txt" 
        txt_path = os.path.join(output_folder, txt_filename) 

        with open(txt_path, "w") as f: 
            for obj in results.object_prediction_list: 
                category = obj.category.id   
                confidence = float(obj.score.value) 
                x, y, w, h = obj.bbox.to_xywh()
                
                f.write(f"{category} {x} {y} {w} {h} {confidence:.4f}\n")
                cv2.rectangle(image, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0), 2)
                cv2.putText(image, f"{category}: {confidence:.2f}", (int(x), int(y) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        
        visualized_path = os.path.join(visualized_folder, filename)
        cv2.imwrite(visualized_path, image)

print("Processing complete. Predictions saved in:", output_folder)
print("Visualized images saved in:", visualized_folder)

In [4]:
def convert(x_center, y_center, w, h, img_width, img_height):
    x_min = (x_center - w / 2) * img_width
    y_min = (y_center - h / 2) * img_height
    x_max = (x_center + w / 2) * img_width
    y_max = (y_center + h / 2) * img_height
    return x_min, y_min, x_max, y_max

def compute_iou(box1, box2):
    x_min1, y_min1, x_max1, y_max1 = box1
    x_min2, y_min2, x_max2, y_max2 = box2

    inter_x_min = max(x_min1, x_min2)
    inter_y_min = max(y_min1, y_min2)
    inter_x_max = min(x_max1, x_max2)
    inter_y_max = min(y_max1, y_max2)

    inter_width = max(0, inter_x_max - inter_x_min)
    inter_height = max(0, inter_y_max - inter_y_min)
    inter_area = inter_width * inter_height

    area1 = (x_max1 - x_min1) * (y_max1 - y_min1)
    area2 = (x_max2 - x_min2) * (y_max2 - y_min2)
    union_area = area1 + area2 - inter_area

    return inter_area / union_area if union_area > 0 else 0

def compute_precision_recall(gt_boxes, pred_boxes, iou_threshold=0.5):
    tp, fp, fn = 0, 0, 0
    
    for gt_box in gt_boxes:
        matched = False
        for pred_box in pred_boxes:
            if compute_iou(gt_box, pred_box) >= iou_threshold:
                matched = True
                break
        if matched:
            tp += 1
        else:
            fn += 1
    
    fp = len(pred_boxes) - tp
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    return precision, recall

def compute_map50(gt_boxes_list, pred_boxes_list):
    ap_sum, count = 0, 0
    total_precision, total_recall = 0, 0
    
    for gt_boxes, pred_boxes in zip(gt_boxes_list, pred_boxes_list):
        precision, recall = compute_precision_recall(gt_boxes, pred_boxes)
        total_precision += precision
        total_recall += recall
        ap_sum += precision
        count += 1

    map50 = ap_sum / count if count > 0 else 0
    avg_precision = total_precision / count if count > 0 else 0
    avg_recall = total_recall / count if count > 0 else 0
    
    return map50, avg_precision, avg_recall

In [6]:
gt = r'Smoking-person-3\test\labels'
sahi = r'Smoking-person-3\test\sahi predicted 3\k aug moi'
img_width, img_height = 640, 640
iou_threshold = 0.5

gt_boxes_list, pred_boxes_list = [], []

for filename in os.listdir(gt):
    gt_file = os.path.join(gt, filename)
    pred_file = os.path.join(sahi, filename)
    
    if not os.path.exists(pred_file):
        print(f"Prediction file missing for {filename}")
        continue

    gt_boxes, pred_boxes = [], []
    
    with open(gt_file, "r") as f:
        for line in f:
            gt_data = line.strip().split()
            if len(gt_data) < 5:
                continue
            _, gt_x, gt_y, gt_w, gt_h = map(float, gt_data)
            gt_boxes.append(convert(gt_x, gt_y, gt_w, gt_h, img_width, img_height))
    
    with open(pred_file, "r") as f:
        for line in f:
            pred_data = line.strip().split()
            if len(pred_data) < 6:
                continue
            _, pred_x_min, pred_y_min, pred_w, pred_h, _ = map(float, pred_data)
            pred_x_max = pred_x_min + pred_w
            pred_y_max = pred_y_min + pred_h
            pred_boxes.append((pred_x_min, pred_y_min, pred_x_max, pred_y_max))
    
    gt_boxes_list.append(gt_boxes)
    pred_boxes_list.append(pred_boxes)

map50, avg_precision, avg_recall = compute_map50(gt_boxes_list, pred_boxes_list)
print(f"mAP@50: {map50:.4f}")
print(f"Precision: {avg_precision:.4f}")
print(f"Recall: {avg_recall:.4f}")

mAP@50: 0.9010
Precision: 0.9010
Recall: 0.9010


---
# Performance Metrics

### YOLOv11

In [2]:
model = YOLO(r'models\yolov11_best.pt')

In [3]:
img = r"Smoking-person-3\test\images\14_jpg.rf.32414b88753f9ea284468d4b06215549.jpg"  

start_time = time.time()
for _ in range(100):  # 100 Inferences
    results = model(img)
end_time = time.time()

fps = 100 / (end_time - start_time)
print(f"FPS: {fps:.2f}")


image 1/1 d:\Advanced_Python\Smoker Detection\Implementation\Smoking-person-3\test\images\14_jpg.rf.32414b88753f9ea284468d4b06215549.jpg: 640x640 1 cigarette, 29.8ms
Speed: 4.8ms preprocess, 29.8ms inference, 330.5ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 d:\Advanced_Python\Smoker Detection\Implementation\Smoking-person-3\test\images\14_jpg.rf.32414b88753f9ea284468d4b06215549.jpg: 640x640 1 cigarette, 28.3ms
Speed: 2.8ms preprocess, 28.3ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 d:\Advanced_Python\Smoker Detection\Implementation\Smoking-person-3\test\images\14_jpg.rf.32414b88753f9ea284468d4b06215549.jpg: 640x640 1 cigarette, 28.2ms
Speed: 2.0ms preprocess, 28.2ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 d:\Advanced_Python\Smoker Detection\Implementation\Smoking-person-3\test\images\14_jpg.rf.32414b88753f9ea284468d4b06215549.jpg: 640x640 1 cigarette, 27.5ms
Speed: 1.9ms preprocess, 27.5ms inferen

In [7]:
start_time = time.time()
results = model(img)  
end_time = time.time()

latency = (end_time - start_time) * 1000  
print(f"Latency per image: {latency:.2f} ms")


image 1/1 d:\Advanced_Python\Smoker Detection\Implementation\Smoking-person-3\test\images\14_jpg.rf.32414b88753f9ea284468d4b06215549.jpg: 640x640 1 cigarette, 187.6ms
Speed: 3.7ms preprocess, 187.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
Latency per image: 205.16 ms


In [8]:
image_folder = r'Smoking-person-3\throughput_testing'
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.jpg', '.png', '.jpeg'))]

start_time = time.time()
results = model(image_files) 
end_time = time.time()

throughput = len(image_files) / (end_time - start_time)
print(f"Throughput: {throughput:.2f} images/sec")


0: 640x640 1 cigarette, 34.5ms
1: 640x640 1 cigarette, 34.5ms
2: 640x640 2 cigarettes, 34.5ms
3: 640x640 1 cigarette, 34.5ms
4: 640x640 1 cigarette, 34.5ms
5: 640x640 1 cigarette, 34.5ms
6: 640x640 1 cigarette, 34.5ms
7: 640x640 1 cigarette, 34.5ms
8: 640x640 1 cigarette, 34.5ms
9: 640x640 1 cigarette, 34.5ms
10: 640x640 1 cigarette, 34.5ms
11: 640x640 1 cigarette, 34.5ms
12: 640x640 1 cigarette, 34.5ms
13: 640x640 1 cigarette, 34.5ms
14: 640x640 1 cigarette, 34.5ms
15: 640x640 1 cigarette, 34.5ms
16: 640x640 1 cigarette, 34.5ms
17: 640x640 1 cigarette, 34.5ms
18: 640x640 2 cigarettes, 34.5ms
19: 640x640 1 cigarette, 34.5ms
Speed: 11.0ms preprocess, 34.5ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)
Throughput: 18.41 images/sec


FPS: 13.23

Latency per image: 205.16 ms

Throughput: 18.41 images/sec

### SAHI

In [9]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model_path=r"models\yolov11_best.pt",
    confidence_threshold=0.5,
    device="cuda"
)

In [21]:
start_time = time.time()
for _ in range(100): 
    results = get_sliced_prediction(
        img, 
        detection_model, 
        slice_height=440, 
        slice_width=440, 
        overlap_height_ratio=0.4,  
        overlap_width_ratio=0.4
    )
end_time = time.time()
fps = 100 / (end_time - start_time)
print(f"FPS: {fps:.2f}")

Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing predictio

In [41]:
start_time = time.time()
results = get_sliced_prediction(
    img, 
    detection_model, 
    slice_height=440, 
    slice_width=440, 
    overlap_height_ratio=0.4,  
    overlap_width_ratio=0.4
)
end_time = time.time()

latency = (end_time - start_time) * 1000 
print(f"Latency per image: {latency:.2f} ms")

Performing prediction on 4 slices.
Latency per image: 175.32 ms


In [None]:
image_folder = r'Smoking-person-3\throughput_testing'
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.jpg'))]

start_time = time.time()
for img_path in image_files:
    results = get_sliced_prediction(
        img_path, 
        detection_model, 
        slice_height=440, 
        slice_width=440, 
        overlap_height_ratio=0.4,  
        overlap_width_ratio=0.4
    )
end_time = time.time()

throughput = len(image_files) / (end_time - start_time)
print(f"Throughput: {throughput:.2f} images/sec")

Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Performing prediction on 4 slices.
Throughput (Sliced Model): 5.21 images/sec


FPS: 5.26

Latency per image: 175.32 ms

Throughput: 5.21 images/sec