
# Food‑11 × YOLO11 — Training Notebook

This notebook trains **YOLO11** on the **Food‑11** dataset you placed at:

```
/home/kristoffel/datasets/dataset-03-Food-11/
├── training/
├── validation/
└── evaluation/
```

**Important:** Food‑11 is a **classification** dataset (each image belongs to one of 11 classes).  
If you intended **object detection**, you must provide bounding‑box labels in YOLO format; otherwise use the **classification path** below.

> Tip: Run the cells top‑to‑bottom. Any cell with a `← edit me` comment is a place where you can/should tweak settings.


In [9]:
# --- 1) Paths & switches -------------------------------------------------------
# ← edit me if your paths differ
import os, pathlib

DATASET_ROOT = "/home/kristoffel/datasets/dataset-03-Food-11"
TRAIN_DIR    = os.path.join(DATASET_ROOT, "training")
VAL_DIR      = os.path.join(DATASET_ROOT, "validation")
TEST_DIR     = os.path.join(DATASET_ROOT, "evaluation")  # used as 'test' split

MODEL_DIR    = "/home/kristoffel/models"
os.makedirs(MODEL_DIR, exist_ok=True)

# Choose task: 'classify' (default, fits Food-11) or 'detect' (needs YOLO-format labels)
TASK = "classify"   # ← edit me: 'classify' or 'detect'

# Run / model names
RUN_NAME_CLS = "food11_yolo11n_cls"
RUN_NAME_DET = "food11_yolo11n_det"

# Weights
CLASS_WEIGHTS = "yolo11n-cls.pt"                   # auto-downloads if not found
DET_WEIGHTS   = os.path.join(MODEL_DIR, "yolo11n.pt")  # base detection weights (your file)

for p in (TRAIN_DIR, VAL_DIR, TEST_DIR, MODEL_DIR):
    assert os.path.isdir(p), f"Missing directory: {p}"
print("✅ Paths look good.")
print("Task:", TASK)
print("Model dir:", MODEL_DIR)

✅ Paths look good.
Task: classify
Model dir: /home/kristoffel/models


In [None]:

# --- 2) Install/Import Ultralytics & basic env info ----------------------------
# If Ultralytics is already installed, you can skip the pip line.
# !pip install -U ultralytics

import os, sys, platform
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

try:
    import torch
    from ultralytics import YOLO
    import ultralytics
    print("Ultralytics:", ultralytics.__version__)
    print("PyTorch:", torch.__version__)
    print("CUDA available:", torch.cuda.is_available())
    if torch.cuda.is_available():
        print("Device:", torch.cuda.get_device_name(0))
except Exception as e:
    print("⚠️ Import failed. If needed, un-comment the pip line above and re-run this cell.")
    raise


In [None]:

# --- 3) Sanity check splits & class names -------------------------------------
from collections import Counter

IMG_EXTS = (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tif", ".tiff", ".webp")

def list_classes(root):
    return sorted([d.name for d in pathlib.Path(root).iterdir() if d.is_dir()])

def count_images_per_class(root, classes):
    counts = {}
    total = 0
    for c in classes:
        cdir = os.path.join(root, c)
        n = sum(1 for r,_,fs in os.walk(cdir) for f in fs if f.lower().endswith(IMG_EXTS))
        counts[c] = n
        total += n
    return total, counts

classes = list_classes(TRAIN_DIR)
assert len(classes) > 0, "No class folders found in training/"
print(f"Found {len(classes)} classes:", classes)

# Check that val/test have same classes
for split_dir, name in [(VAL_DIR,"validation"), (TEST_DIR,"evaluation")]:
    split_classes = list_classes(split_dir)
    missing = set(classes) - set(split_classes)
    extra   = set(split_classes) - set(classes)
    assert not missing and not extra, f"Class mismatch in {name}: missing={missing}, extra={extra}"
print("✅ Class folders match across splits.")

for split_dir, name in [(TRAIN_DIR,"training"), (VAL_DIR,"validation"), (TEST_DIR,"evaluation")]:
    tot, per = count_images_per_class(split_dir, classes)
    print(f"{name:12s}: {tot:5d} images")


In [None]:

# --- 4) Build a classification YAML -------------------------------------------
# Ultralytics classification works nicely with a YAML pointing to train/val/test.
# We'll save it inside your MODEL_DIR so runs and configs live together.
import yaml

cls_yaml_path = os.path.join(MODEL_DIR, "food11-cls.yaml")
cls_cfg = {
    "train": TRAIN_DIR,
    "val":   VAL_DIR,
    "test":  TEST_DIR,
    "names": classes,  # optional for classification, but helpful for plots
}
with open(cls_yaml_path, "w") as f:
    yaml.safe_dump(cls_cfg, f, sort_keys=False)
print("Wrote:", cls_yaml_path)
print(open(cls_yaml_path).read())


In [None]:

# --- 5) (Optional) Detection YAML (only if you have YOLO-format labels) -------
# Expecting structure:
# training/
#   images/<class or mix>/image.jpg
#   labels/<same_stem>.txt
# If no labels/ folders are present, we keep classification as the default route.

det_yaml_path = os.path.join(MODEL_DIR, "food11-det.yaml")
train_lbl = os.path.join(DATASET_ROOT, "training", "labels")
val_lbl   = os.path.join(DATASET_ROOT, "validation", "labels")

if os.path.isdir(train_lbl) and os.path.isdir(val_lbl):
    det_cfg = {
        "path": DATASET_ROOT,
        "train": os.path.join(DATASET_ROOT, "training", "images"),
        "val":   os.path.join(DATASET_ROOT, "validation", "images"),
        "test":  os.path.join(DATASET_ROOT, "evaluation", "images") if os.path.isdir(os.path.join(DATASET_ROOT, "evaluation", "images")) else None,
        "names": classes,  # list of class names for detection
    }
    # remove None keys
    det_cfg = {k:v for k,v in det_cfg.items() if v is not None}
    import yaml
    with open(det_yaml_path, "w") as f:
        yaml.safe_dump(det_cfg, f, sort_keys=False)
    print("Detection YAML written to:", det_yaml_path)
else:
    print("No YOLO-format labels found. Staying with classification (recommended for Food-11).")


In [None]:

# --- 6) Train — Classification (default) --------------------------------------
if TASK != "classify":
    print("Skipping classification training because TASK != 'classify'")
else:
    from ultralytics import YOLO
    import torch

    EPOCHS = 50     # ← edit me
    IMGSZ  = 224    # ← edit me (common: 224/256/320)
    BATCH  = 64     # ← edit me

    model = YOLO(CLASS_WEIGHTS)  # yolo11n-cls.pt
    results = model.train(
        data=cls_yaml_path,
        epochs=EPOCHS,
        imgsz=IMGSZ,
        batch=BATCH,
        lr0=1e-3,
        patience=20,
        project=MODEL_DIR,
        name=RUN_NAME_CLS,
        plots=True,
        verbose=True,
        device=0 if torch.cuda.is_available() else "cpu",
    )
    print("Training complete. Best weights should be under:", os.path.join(MODEL_DIR, RUN_NAME_CLS, "weights", "best.pt"))


In [None]:

# --- 7) Validate on 'val' and 'evaluation' (as test) ---------------------------
if TASK != "classify":
    print("Skipping classification validation because TASK != 'classify'")
else:
    from ultralytics import YOLO
    import torch

    model = YOLO(os.path.join(MODEL_DIR, RUN_NAME_CLS, "weights", "best.pt")) if os.path.isfile(os.path.join(MODEL_DIR, RUN_NAME_CLS, "weights", "best.pt")) else YOLO(CLASS_WEIGHTS)

    IMGSZ = 224  # keep consistent with training

    # Val split
    metrics_val = model.val(
        data=cls_yaml_path,
        split="val",
        imgsz=IMGSZ,
        project=MODEL_DIR,
        name=RUN_NAME_CLS + "_val",
        device=0 if torch.cuda.is_available() else "cpu",
    )
    # Test split (Food-11 "evaluation")
    metrics_test = model.val(
        data=cls_yaml_path,
        split="test",
        imgsz=IMGSZ,
        project=MODEL_DIR,
        name=RUN_NAME_CLS + "_test",
        device=0 if torch.cuda.is_available() else "cpu",
    )
    print("Done. Plots & metrics saved under:", MODEL_DIR)


In [None]:

# --- 8) Quick predictions ------------------------------------------------------
if TASK != "classify":
    print("Skipping classification prediction preview because TASK != 'classify'")
else:
    from ultralytics import YOLO
    import random, glob

    model_path = os.path.join(MODEL_DIR, RUN_NAME_CLS, "weights", "best.pt")
    model = YOLO(model_path if os.path.isfile(model_path) else CLASS_WEIGHTS)

    # Pick up to 16 random images from the evaluation split
    cand = []
    for c in classes:
        cand += glob.glob(os.path.join(TEST_DIR, c, "*"))
    sample = random.sample(cand, k=min(16, len(cand)))

    out_name = RUN_NAME_CLS + "_preds"
    preds = model.predict(
        source=sample,
        save=True,
        project=MODEL_DIR,
        name=out_name
    )
    print("Saved prediction previews to:", os.path.join(MODEL_DIR, out_name))


In [None]:

# --- 9) Train — Detection (only if you truly have detection labels) -----------
if TASK != "detect":
    print("Skipping detection training because TASK != 'detect'")
else:
    assert os.path.isfile(DET_WEIGHTS), f"Detection weights not found: {DET_WEIGHTS}"
    assert os.path.isfile(os.path.join(MODEL_DIR, "food11-det.yaml")), "Detection YAML missing. Build it in step 5."
    from ultralytics import YOLO
    import torch

    EPOCHS = 100    # ← edit me
    IMGSZ  = 640    # ← edit me
    BATCH  = 16     # ← edit me

    model = YOLO(DET_WEIGHTS)
    results = model.train(
        data=os.path.join(MODEL_DIR, "food11-det.yaml"),
        epochs=EPOCHS,
        imgsz=IMGSZ,
        batch=BATCH,
        lr0=1e-3,
        patience=50,
        project=MODEL_DIR,
        name=RUN_NAME_DET,
        plots=True,
        device=0 if torch.cuda.is_available() else "cpu",
        # Basic augmentation knobs (tune as needed)
        hsv_h=0.015, hsv_s=0.7, hsv_v=0.6,
        degrees=10.0, translate=0.1, scale=0.30, shear=3.0, perspective=0.002,
        flipud=0.0, fliplr=0.5, mosaic=0.5, mixup=0.1,
    )
    print("Detection training complete. Best weights under:", os.path.join(MODEL_DIR, RUN_NAME_DET, "weights", "best.pt"))



## You're set!
- To **change hyper‑parameters**, edit the `EPOCHS`, `IMGSZ`, and `BATCH` variables in the training cells.
- All outputs (weights, plots) are written under your `MODEL_DIR` so they’re easy to find.
- If you truly needed **detection**, flip `TASK = "detect"` and ensure you have YOLO‑format **labels/** directories.

> Troubleshooting tip: If you see any import issues, re-run the install cell and restart the kernel.
