<a href="https://colab.research.google.com/github/lorenzopaoria/Smoking-detection-and-distance-analysis/blob/main/person_cigarette_model_load.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Run a model for sigarette and person detection

In [2]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.78-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [3]:
from ultralytics import YOLO
import torch
from pathlib import Path
import cv2
import numpy as np
from tqdm.auto import tqdm
import os
import json

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
class InferenceVisualizer:
    def __init__(self, model_path, conf_threshold=0.25):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = YOLO(model_path)
        self.model.to(self.device)
        self.conf_threshold = conf_threshold
        self.image_counter = 1

        self.colors = {
            0: (0, 255, 0),    #verde, bounding box per le sigarette
            1: (255, 0, 0),    #blu, bounding box per i non fumatori
            2: (0, 0, 255),    #rosso, bounding box per i fumatori
        }
    #salva bounding box in json per calcolarci successivaamente le distanze
    def save_detection_data(self, image_name, boxes, classes, confidences, output_dir, sequence_number):    
        detections = []
        #per ogni oggetto creo dizionario 
        for box, cls, conf in zip(boxes, classes, confidences):
            detection = {
                'class': int(cls),
                'confidence': float(conf),
                'bbox': [float(x) for x in box], 
            }
            detections.append(detection)

        data = {
            'image_name': image_name,
            'detections': detections
        }
        
        #creo se non esiste cartella per i json sulle coordinate
        coords_dir = Path(output_dir) / 'coordinates'
        coords_dir.mkdir(parents=True, exist_ok=True)

        json_path = coords_dir / f"trained_{sequence_number}.json"
        with open(json_path, 'w') as f:
            json.dump(data, f, indent=2)

    #processa una foto e la salva
    def process_image(self, image_path, output_dir):    
        image = cv2.imread(str(image_path))
        if image is None:
            print(f"Could not read image: {image_path}")
            return False

        results = self.model.predict(image, conf=self.conf_threshold)[0]
        annotated_image = image.copy()

        boxes = results.boxes.xyxy.cpu().numpy() #estrae le coordinate delle bb
        classes = results.boxes.cls.cpu().numpy() #estrae le classi degli oggetti
        confidences = results.boxes.conf.cpu().numpy() #estrae la confidenza degli oggetti
        
        #creo se non esiste cartella per le immagini con bb
        images_dir = Path(output_dir) / 'images'
        images_dir.mkdir(parents=True, exist_ok=True)

        #disegno bb e inf sulle immagini 
        for box, cls, conf in zip(boxes, classes, confidences):
            x1, y1, x2, y2 = box.astype(int)
            class_id = int(cls)
            color = self.colors.get(class_id, (0, 255, 0)) #coloro le bb in base alla classe

            cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)#disegno bb
            label = f"Class {class_id}: {conf:.2f}" #sopra la bb stampo id della classe e la confidence con cui è stata rilevata

            text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
            text_x = x1
            text_y = y1 - 5 if y1 - 5 > text_size[1] else y1 + text_size[1]

            cv2.rectangle(annotated_image, (text_x, text_y - text_size[1] - 4), (text_x + text_size[0], text_y), color, -1)#contorno testo
            cv2.putText(annotated_image, label, (text_x, text_y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

        output_image_path = images_dir / f"trained_{self.image_counter}.jpg"#salvo immagine 
        cv2.imwrite(str(output_image_path), annotated_image)

        self.save_detection_data(str(Path(image_path).name), boxes, classes, confidences, output_dir, self.image_counter)#salvo coordinate bb

        self.image_counter += 1
        return True
    
    #prende tutte le foto in una dir
    def process_directory(self, input_dir, output_dir):
        input_path = Path(input_dir) #path dataset
        output_path = Path(output_dir) #path immagini e coordinate finali
        output_path.mkdir(parents=True, exist_ok=True)

        self.image_counter = 1

        image_files = []
        for ext in ['.jpg', '.jpeg', '.png']:
            image_files.extend(list(input_path.glob(f'*{ext}')))
            image_files.extend(list(input_path.glob(f'*{ext.upper()}')))

        total_images = len(image_files)
        print(f"Found {total_images} images")

        if total_images == 0:
            print("No images found in the input directory")
            return

        processed = 0
        failed = 0

        #barra di progressione
        with tqdm(total=total_images, desc="Processing images", unit="img", ncols=80) as pbar:
            for img_path in image_files:
                try:
                    if self.process_image(img_path, output_path):
                        processed += 1
                    else:
                        failed += 1
                except Exception as e:
                    print(f"\nError processing {img_path.name}: {str(e)}")
                    failed += 1
                pbar.update(1)

        #riepilogo
        print(f"\nProcessing complete!")
        print(f"Successfully processed: {processed} images")
        if failed > 0:
            print(f"Failed to process: {failed} images")
        print(f"Results saved in: {output_dir}")

In [6]:
def main():
    model_path = '/content/drive/MyDrive/pt_model_trained/run_20250211_142722/train/weights/best.pt'
    input_dir = '/content/drive/MyDrive/Photo_dataset/test/images'
    output_dir = '/content/drive/MyDrive/trained_photos'

    visualizer = InferenceVisualizer(model_path, conf_threshold=0.4)

    visualizer.process_directory(input_dir, output_dir)

    print("Elaborazione completata!")
    print(f"Le immagini elaborate sono state salvate in: {output_dir}")

if __name__ == "__main__":
    main()

Found 73 images


Processing images:   0%|                                | 0/73 [00:00<?, ?img/s]


0: 576x1024 2 cigarettes, 1 nonSmoker, 2 smokers, 42.6ms
Speed: 18.0ms preprocess, 42.6ms inference, 81.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 2 cigarettes, 1 nonSmoker, 3 smokers, 10.9ms
Speed: 9.2ms preprocess, 10.9ms inference, 0.5ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 2 cigarettes, 1 nonSmoker, 3 smokers, 10.9ms
Speed: 6.2ms preprocess, 10.9ms inference, 0.7ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 1 cigarette, 1 nonSmoker, 13.4ms
Speed: 5.0ms preprocess, 13.4ms inference, 0.5ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 2 cigarettes, 1 nonSmoker, 3 smokers, 10.9ms
Speed: 6.4ms preprocess, 10.9ms inference, 0.5ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 1 cigarette, 1 nonSmoker, 2 smokers, 10.9ms
Speed: 4.1ms preprocess, 10.9ms inference, 0.5ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 1 cigarette, 6 nonSmokers, 1 smoker, 12.5ms
Speed: 6.0ms prepr