# Smoke detection in images

[Dataset](https://huggingface.co/datasets/pyronear/pyro-sdis)

Goal: detect smoke in photographs of forests, mountains... using bounding boxes

## Import libraries and select device

In [6]:
import plotly.express as px

import numpy as np

import spacy
from datasets import load_dataset
from torch.utils.data.dataloader import default_collate

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics
from torchinfo import summary
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint

from ultralytics import YOLO

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device {device}')
%env PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/home/theovld/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Using device cuda
env: PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True


# Dataset pipeline

## Transforming from Hugging Face to Ultralytics

We save the images from the dataset in the current folder, with the Ultralytics YOLO format for the program to read it properly. The corresponding .yaml file is already writen.

In [2]:
import os
from datasets import load_dataset

# Define paths
REPO_ID = "pyronear/pyro-sdis"
OUTPUT_DIR = "datasets/pyro-sdis"
IMAGE_DIR = os.path.join(OUTPUT_DIR, "images")
LABEL_DIR = IMAGE_DIR.replace("images", "labels")

# Create the directory structure
for split in ["train", "val"]:
    os.makedirs(os.path.join(IMAGE_DIR, split), exist_ok=True)
    os.makedirs(os.path.join(LABEL_DIR, split), exist_ok=True)

# Load the dataset from the Hugging Face Hub
dataset = load_dataset(REPO_ID)

In [3]:
def save_ultralytics_format(dataset_split, split):
    """
    Save a dataset split into the Ultralytics format.
    Args:
        dataset_split: The dataset split (e.g., dataset["train"])
        split: "train" or "val"
    """
    for example in dataset_split:
        # Save the image to the appropriate folder
        image = example["image"]  # PIL.Image.Image
        image_name = example["image_name"]  # Original file name
        output_image_path = os.path.join(IMAGE_DIR, split, image_name)

        # Save the image object to disk
        image.save(output_image_path)

        # Save label
        annotations = example["annotations"]
        label_name = image_name.replace(".jpg", ".txt").replace(".png", ".txt")
        output_label_path = os.path.join(LABEL_DIR, split, label_name)
        
        with open(output_label_path, "w") as label_file:
            label_file.write(annotations)

# Save train and validation splits
save_ultralytics_format(dataset["train"], "train")
save_ultralytics_format(dataset["val"], "val")

print("Dataset exported to Ultralytics format.")

Dataset exported to Ultralytics format.


In [5]:
from huggingface_hub import hf_hub_download
 
# Correctly set repo_id and repo_type
repo_id = "pyronear/pyro-sdis"
filename = "data.yaml"

# Download data.yaml to the current directory
yaml_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="dataset", local_dir=".")
print(f"data.yaml downloaded to: {yaml_path}")

data.yaml:   0%|          | 0.00/187 [00:00<?, ?B/s]

data.yaml downloaded to: data.yaml


# YOLO fine-tuning

## Training

In [8]:
model = YOLO()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|██████████| 5.35M/5.35M [00:05<00:00, 987kB/s] 


In [None]:
results = model.train(data='data.yaml', epochs=25, batch=32, imgsz=720, dropout=0.5)

Ultralytics 8.3.70 🚀 Python-3.10.12 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce GTX 1050, 4034MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=data.yaml, epochs=25, time=None, patience=100, batch=32, imgsz=720, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.5, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True,

100%|██████████| 755k/755k [00:00<00:00, 2.32MB/s]


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      6640  ultralytics.nn.modules.block.C3k2            [32, 64, 1, False, 0.25]      
  3                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
  4                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  5                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  6                  -1  1     87040  ultralytics.nn.modules.block.C3k2            [128, 128, 1, True]           
  7                  -1  1    295424  ultralytics

[34m[1mtrain: [0mScanning /media/theovld/DATA1/Théo/Développement/Python/Frugal-AI/smoke-detection/datasets/pyro-sdis/labels/train... 11527 images, 2757 backgrounds, 8770 corrupt:  39%|███▉      | 11527/29537 [07:00<38:07,  7.87it/s]

## Adding data augmentation