In [60]:
import os
import pandas as pd
import yaml
from sklearn.model_selection import KFold
from ultralytics import YOLO

In [61]:
DATA_YAML_PATH = "datasets/Wildfire-Smoke/yolo8/data_fold0.yaml"
MODEL_PATH = "yolov8s.pt"
EPOCHS = 22
KFOLDS = 5
WEIGHT_DECAY = 0.0005
PATIENCE = 5

In [62]:
model = YOLO(MODEL_PATH)

In [63]:
annotations = pd.read_csv("datasets/Wildfire-Smoke/train/_annotations.csv")
filename = annotations["filename"].unique()

kf = KFold(n_splits=KFOLDS, shuffle=True, random_state=42)
results = []

In [64]:
for fold, (train_idx, val_idx) in enumerate(kf.split(filename)):
    print(f"\n--- Fold {fold + 1}/{KFOLDS} ---")

    # Подготовка файлов для обучения и валидации
    train_files = filename[train_idx]
    val_files = filename[val_idx]

    train_data = annotations[annotations["filename"].isin(train_files)]
    val_data = annotations[annotations["filename"].isin(val_files)]

    # Сохранение CSV для текущего фолда
    train_data.to_csv(f"datasets/Wildfire-Smoke/yolo8/train_fold{fold}.csv", index=False)
    val_data.to_csv(f"datasets/Wildfire-Smoke/yolo8/val_fold{fold}.csv", index=False)

    # Обновление data.yaml для текущего фолда
    fold_yaml_path = f"datasets/Wildfire-Smoke/yolo8/data_fold{fold}.yaml"
    train_path = os.path.abspath(f"datasets/Wildfire-Smoke/yolo8/train_fold{fold}.csv")
    val_path = os.path.abspath(f"datasets/Wildfire-Smoke/yolo8/val_fold{fold}.csv")
    with open(fold_yaml_path, "w") as yaml_file:
        yaml.dump({
            "train": train_path,
            "val": val_path,
            "nc": 1,
            "names": ["smoke"]
        }, yaml_file)

    # Проверка путей к изображениям
    missing_files = []
    for image_path in train_data["filename"].tolist() + val_data["filename"].tolist():
        full_path = os.path.join("datasets/Wildfire-Smoke/train", image_path)
        if not os.path.exists(full_path):
            missing_files.append(full_path)

    if missing_files:
        print("Отсутствуют следующие файлы:", missing_files)
        raise FileNotFoundError("Некоторые изображения отсутствуют.")

    # Настройка и запуск обучения
    results_dir = f"runs/detect/yolo8/fold{fold}"
    model.train(
        data=fold_yaml_path,
        epochs=EPOCHS,
        imgsz=640,
        project=results_dir,
        weight_decay=WEIGHT_DECAY,
        patience=PATIENCE,
        augment=True
    )

    # Сохранение метрик
    metrics_path = os.path.join(results_dir, "results.csv")
    if os.path.exists(metrics_path):
        metrics = pd.read_csv(metrics_path)
        metrics["fold"] = fold
        results.append(metrics)


--- Fold 1/5 ---
New https://pypi.org/project/ultralytics/8.3.49 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.39 🚀 Python-3.11.2 torch-2.2.2 CPU (Intel Core(TM) i5-8257U 1.40GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=datasets/Wildfire-Smoke/yolo8/data_fold0.yaml, epochs=22, time=None, patience=5, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=runs/detect/yolo8/fold0, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, c

FileNotFoundError: [34m[1mtrain: [0mError loading data from /Users/baiturtashbaev/Documents/Байтур/Projects/Course Arbeit/INAI_MedFire/datasets/Wildfire-Smoke/yolo8/train_fold0.csv
See https://docs.ultralytics.com/datasets for dataset formatting guidance.

In [26]:
all_results = pd.concat(results)
all_results.to_csv("runs/detect/yolo8/all_metrics.csv", index=False)

ValueError: No objects to concatenate

In [None]:
for metric in ["precision", "recall", "map50"]:
    plt.figure(figsize=(10, 6))
    for fold in range(KFOLDS):
        fold_data = all_results[all_results["fold"] == fold]
        plt.plot(fold_data["epoch"], fold_data[metric], label=f"Fold {fold}")
    plt.title(f"{metric.capitalize()} across folds")
    plt.xlabel("Epoch")
    plt.ylabel(metric.capitalize())
    plt.legend()
    plt.grid()
    plt.savefig(f"runs/{metric}_comparison.png")
    plt.show()

print("\nКросс-валидация завершена. Метрики сохранены в 'runs/all_metrics.csv'.")
