In [1]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.4.10-py3-none-any.whl.metadata (38 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Downloading opencv_python-4.13.0.90-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Downloading pyyaml-6.0.3-cp310-cp310-win_amd64.whl.metadata (2.4 kB)
Collecting polars>=0.20.0 (from ultralytics)
  Downloading polars-1.37.1-py3-none-any.whl.metadata (10 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Collecting polars-runtime-32==1.37.1 (from polars>=0.20.0->ultralytics)
  Downloading polars_runtime_32-1.37.1-cp310-abi3-win_amd64.whl.metadata (1.5 kB)
Downloading ultralytics-8.4.10-py3-none-any.whl (1.2 MB)
   ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
   ---------------------------------------- 1.2/1.2 MB 8.5 MB/s  0:00:00
Downloading opencv_python-4.13.0.90-cp37-abi3-win_amd64.whl (40.2 MB)
   -----

In [9]:
from pathlib import Path
import shutil
import random
import copy
import torch
from ultralytics import YOLO

# --------------------
# CONFIG
# --------------------
DATASET_DIR = "dataset"     # contains train/ and val/
CLIENTS_DIR = "clients"     # folder for client splits
SERVER_DIR = "server"       # global model storage

NUM_CLIENTS = 3             # change as needed
ROUNDS = 5
EPOCHS_PER_CLIENT = 1
BATCH_SIZE = 4
IMG_SIZE = 640
YOLO_PRETRAINED = "yolov8n.pt"  # base pretrained weights

CLASS_NAMES = [
    "emergency_exit", "quarry_face", "explosion_hazard", "stop_authorized_only",
    "keep_right", "falling_rocks", "keep_left", "no_trespassing",
    "wear_mask", "wear_eye_glass", "wear_ear_protection", "wear_helmet",
    "blasting_ahead"
]


In [11]:
def split_dataset(dataset_dir, num_clients, output_dir=CLIENTS_DIR):
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    train_dir = Path(dataset_dir)/"train"
    val_dir = Path(dataset_dir)/"val"

    train_images = sorted(list(train_dir.glob("*.jpg")))
    train_labels = sorted(list(train_dir.glob("*.txt")))
    val_images = sorted(list(val_dir.glob("*.jpg")))
    val_labels = sorted(list(val_dir.glob("*.txt")))

    combined_train = list(zip(train_images, train_labels))
    combined_val = list(zip(val_images, val_labels))

    random.shuffle(combined_train)
    random.shuffle(combined_val)

    chunk_size_train = len(combined_train) // num_clients
    chunk_size_val = len(combined_val) // num_clients

    clients = []

    for i in range(num_clients):
        start_train = i*chunk_size_train
        end_train = (i+1)*chunk_size_train if i != num_clients-1 else len(combined_train)
        start_val = i*chunk_size_val
        end_val = (i+1)*chunk_size_val if i != num_clients-1 else len(combined_val)

        client_train = combined_train[start_train:end_train]
        client_val = combined_val[start_val:end_val]

        client_dir = Path(output_dir)/f"client_{i+1}"
        (client_dir/"images").mkdir(parents=True, exist_ok=True)
        (client_dir/"labels").mkdir(parents=True, exist_ok=True)

        # Copy train + val images & labels
        for img, lbl in client_train + client_val:
            shutil.copy(img, client_dir/"images"/img.name)
            shutil.copy(lbl, client_dir/"labels"/lbl.name)

        # Create dataset.yaml for client
        yaml_content = f"""
train: {client_dir}/images
val: {client_dir}/images
names:
"""
        for idx, name in enumerate(CLASS_NAMES):
            yaml_content += f"  {idx}: {name}\n"

        with open(client_dir/"dataset.yaml", "w") as f:
            f.write(yaml_content.strip())

        clients.append(client_dir)

    print(f"[INFO] Dataset split among {num_clients} clients.")
    return clients

clients = split_dataset(DATASET_DIR, NUM_CLIENTS)


[INFO] Dataset split among 3 clients.


In [14]:
from ultralytics import YOLO
from pathlib import Path

SERVER_DIR = "server"
global_model_path = Path(SERVER_DIR)/"global_model.pt"

# Load YOLOv8 pretrained weights
global_model = YOLO("yolov8n.pt")

# Set the correct number of classes via the .train() argument
# The head will automatically adapt when training on your dataset.yaml
# So just save the pretrained model for FedAvg initialization
global_model.save(global_model_path)
print("[INFO] Global YOLOv8 model saved for federated learning")


[INFO] Global YOLOv8 model saved for federated learning


In [17]:
VAL_DIR = Path(DATASET_DIR)/"val"
val_yaml_path = Path("val_dataset.yaml")

yaml_content = f"""
train: {VAL_DIR}
val: {VAL_DIR}
names:
"""
for idx, name in enumerate(CLASS_NAMES):
    yaml_content += f"  {idx}: {name}\n"

with open(val_yaml_path, "w") as f:
    f.write(yaml_content.strip())

print("[INFO] Validation dataset.yaml created")


[INFO] Validation dataset.yaml created


In [None]:
for r in range(ROUNDS):
    print(f"\n=== FEDERATED ROUND {r+1}/{ROUNDS} ===")
    client_weights = []

    for client_dir in clients:
        print(f"[INFO] Training client: {client_dir.name}")

        # Load current global model
        local_model = YOLO(global_model_path)

        # Train locally on client data
        local_model.train(
            data=f"{client_dir}/dataset.yaml",
            epochs=EPOCHS_PER_CLIENT,
            imgsz=IMG_SIZE,
            batch=BATCH_SIZE,
            project=f"{client_dir}/runs",
            name=f"round_{r}"
        )

        # Append client weights
        client_weights.append(copy.deepcopy(local_model.model.state_dict()))

    # --- SERVER AGGREGATION (FedAvg) ---
    global_dict = copy.deepcopy(client_weights[0])
    for key in global_dict.keys():
        for i in range(1, len(client_weights)):
            global_dict[key] += client_weights[i][key]
        global_dict[key] = torch.div(global_dict[key], len(client_weights))

    # Update global model
    global_model.model.load_state_dict(global_dict)
    global_model.save(global_model_path)
    print(f"[INFO] Round {r+1} aggregation complete. Global model updated!")

    # --- Evaluate global model on validation set ---
    print(f"[INFO] Evaluating global model after round {r+1}")
    metrics = global_model.val(data=str(val_yaml_path), imgsz=IMG_SIZE, batch=BATCH_SIZE)
    print("=== Evaluation Metrics ===")
    for k, v in metrics.items():
        print(f"{k}: {v}")



=== FEDERATED ROUND 1/5 ===
[INFO] Training client: client_1
Ultralytics 8.4.10  Python-3.10.19 torch-2.10.0+cu126 CUDA:0 (NVIDIA GeForce RTX 3070, 8192MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=clients\client_1/dataset.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, end2end=None, epochs=1, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=server\global_model.pt, momentum=0.937, mosaic=1.0, multi_scale=0.0, name=round_05, nbs=64