
# Food‚Äë101 √ó YOLO11 ‚Äî Classification Training

This notebook prepares **Food‚Äë101** (Kaggle: `dansbecker/food-101`) for YOLO‚Äëclassification and trains **YOLO11n‚Äëcls**.

**What it does:**
1. Reads Food‚Äë101 `meta/train.txt` and `meta/test.txt`.
2. Builds `train/`, `val/`, `test/` folders with per‚Äëclass subfolders by **symlinking** from the original `images/` (falls back to copy if needed).
3. Trains `yolo11n-cls.pt` on the prepared splits and evaluates on both **val** and **test**.

> Default validation split: **10% of the official train per class**, with a fixed random seed for reproducibility.


In [1]:

# --- 1) Paths & knobs ----------------------------------------------------------
import os, pathlib, random, shutil
from collections import defaultdict

DATASET_ROOT = "/home/kristoffel/datasets/dataset-02-Food-101"  # ‚Üê edit me if needed
IMAGES_DIR   = os.path.join(DATASET_ROOT, "images")
META_DIR     = os.path.join(DATASET_ROOT, "meta")

# Output splits (folders with class subfolders of symlinks/copies)
TRAIN_DIR = os.path.join(DATASET_ROOT, "train")
VAL_DIR   = os.path.join(DATASET_ROOT, "val")
TEST_DIR  = os.path.join(DATASET_ROOT, "test")

# Validation split ratio from the official train list
VAL_RATIO = 0.10  # ‚Üê edit me

# Training hyperparameters
EPOCHS = 50       # ‚Üê edit me
IMGSZ  = 224      # ‚Üê edit me (common: 224/256/320)
BATCH  = 64       # ‚Üê edit me

MODEL_DIR = "/home/kristoffel/models"
RUN_NAME  = "food101_yolo11n_cls"

# Reproducibility
SEED = 42
random.seed(SEED)

# Sanity checks
for p in (IMAGES_DIR, META_DIR):
    assert os.path.isdir(p), f"Missing: {p} ‚Äî run the download notebook first."
os.makedirs(MODEL_DIR, exist_ok=True)
print("Paths OK.")


Paths OK.


In [2]:

# --- 2) Read meta files: classes, train/test lists ----------------------------
from pathlib import Path

classes_txt = Path(META_DIR) / "classes.txt"
train_txt   = Path(META_DIR) / "train.txt"
test_txt    = Path(META_DIR) / "test.txt"

classes = [ln.strip() for ln in classes_txt.read_text().splitlines() if ln.strip()]
print(f"Found {len(classes)} classes.")
assert len(classes) == 101, f"Expected 101 classes, got {len(classes)}"

def read_list(p: Path):
    # Lines like: class_name/image_stem  (no .jpg extension)
    out = defaultdict(list)
    for ln in p.read_text().splitlines():
        ln = ln.strip()
        if not ln: 
            continue
        cls, stem = ln.split("/")
        out[cls].append(stem + ".jpg")
    return out

train_list = read_list(train_txt)
test_list  = read_list(test_txt)

# Basic cardinality checks (Food-101 official split: 750 train, 250 test per class)
n_train = sum(len(v) for v in train_list.values())
n_test  = sum(len(v) for v in test_list.values())
print(f"Train images listed: {n_train}; Test images listed: {n_test}")


Found 101 classes.
Train images listed: 75750; Test images listed: 25250


In [3]:

# --- 3) Build train/val/test folders using symlinks from images/ --------------
import os, errno

# Clean old splits
for d in (TRAIN_DIR, VAL_DIR, TEST_DIR):
    if os.path.isdir(d):
        shutil.rmtree(d)
    os.makedirs(d, exist_ok=True)

def safe_link(src, dst):
    try:
        os.symlink(src, dst)
    except (OSError, NotImplementedError) as e:
        # Fallback to copying if symlinks are not permitted
        shutil.copy2(src, dst)

# Create per-class dirs
for cls in classes:
    os.makedirs(os.path.join(TRAIN_DIR, cls), exist_ok=True)
    os.makedirs(os.path.join(VAL_DIR, cls),   exist_ok=True)
    os.makedirs(os.path.join(TEST_DIR, cls),  exist_ok=True)

# Allocate val from train per class deterministically
for cls in classes:
    pool = list(train_list[cls])
    random.shuffle(pool)
    k_val = max(1, int(len(pool) * VAL_RATIO))
    val_subset = set(pool[:k_val])
    train_subset = pool[k_val:]

    # Link training subset
    for fname in train_subset:
        src = os.path.join(IMAGES_DIR, cls, fname)
        dst = os.path.join(TRAIN_DIR,  cls, fname)
        if not os.path.isfile(src):
            raise FileNotFoundError(f"Missing image: {src}")
        safe_link(src, dst)

    # Link validation subset
    for fname in val_subset:
        src = os.path.join(IMAGES_DIR, cls, fname)
        dst = os.path.join(VAL_DIR,    cls, fname)
        if not os.path.isfile(src):
            raise FileNotFoundError(f"Missing image: {src}")
        safe_link(src, dst)

# Link official test set
for cls in classes:
    for fname in test_list[cls]:
        src = os.path.join(IMAGES_DIR, cls, fname)
        dst = os.path.join(TEST_DIR,   cls, fname)
        if not os.path.isfile(src):
            raise FileNotFoundError(f"Missing image: {src}")
        safe_link(src, dst)

print("Splits built:")
print(" train ->", TRAIN_DIR)
print(" val   ->", VAL_DIR)
print(" test  ->", TEST_DIR)

# Remove any stale Ultralytics caches if present
for split in ("train","val","test"):
    cache = Path(DATASET_ROOT) / f"{split}.cache"
    if cache.exists():
        cache.unlink()
        print("removed cache:", cache)


Splits built:
 train -> /home/kristoffel/datasets/dataset-02-Food-101/train
 val   -> /home/kristoffel/datasets/dataset-02-Food-101/val
 test  -> /home/kristoffel/datasets/dataset-02-Food-101/test


In [4]:

# --- 4) Quick counts and verification -----------------------------------------
import os
def count_imgs(root):
    tot = 0
    for cls in classes:
        cdir = os.path.join(root, cls)
        n = sum(1 for _r, _d, fs in os.walk(cdir) for _f in fs if _f.lower().endswith(('.jpg','.jpeg','.png','.webp','.bmp','.tif','.tiff')))
        tot += n
    return tot

print("train images:", count_imgs(TRAIN_DIR))
print("val images  :", count_imgs(VAL_DIR))
print("test images :", count_imgs(TEST_DIR))

# Ensure class sets match across splits
def list_class_dirs(root):
    return sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root,d))])
assert list_class_dirs(TRAIN_DIR) == list_class_dirs(VAL_DIR) == list_class_dirs(TEST_DIR) == sorted(classes), "Class folder mismatch across splits"
print("‚úÖ Class folders align across all splits.")


train images: 68175
val images  : 7575
test images : 25250
‚úÖ Class folders align across all splits.


In [5]:

# --- 5) Train YOLO11n-cls -----------------------------------------------------
# Make sure ultralytics is installed (uncomment pip if needed)
# !pip install -U ultralytics

import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

from ultralytics import YOLO
import ultralytics, torch

print("Ultralytics:", ultralytics.__version__)
print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))

model = YOLO("yolo11n-cls.pt")  # classification weights
results = model.train(
    data=DATASET_ROOT,       # directory with train/val/test
    epochs=EPOCHS,
    imgsz=IMGSZ,
    batch=BATCH,
    lr0=1e-3,
    patience=10,
    project=MODEL_DIR,
    name=RUN_NAME,
    plots=True,
    device=0 if torch.cuda.is_available() else "cpu",
)
print("Training run saved to:", results.save_dir)


Ultralytics: 8.3.203
PyTorch: 2.2.0a0+81ea7a4
CUDA available: True
Device: Tesla V100-SXM3-32GB
New https://pypi.org/project/ultralytics/8.3.223 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics 8.3.203 üöÄ Python-3.10.12 torch-2.2.0a0+81ea7a4 CUDA:0 (Tesla V100-SXM3-32GB, 32494MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=64, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/kristoffel/datasets/dataset-02-Food-101, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio



[K[34m[1mtrain: [0mScanning /home/kristoffel/datasets/dataset-02-Food-101/train... 68175 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 68175/68175 3.6Kit/s 18.8s<0.1s
[34m[1mtrain: [0mNew cache created: /home/kristoffel/datasets/dataset-02-Food-101/train.cache
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 1197.6¬±532.3 MB/s, size: 58.6 KB)
[K[34m[1mval: [0mScanning /home/kristoffel/datasets/dataset-02-Food-101/val... 7575 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 7575/7575 2.9Kit/s 2.6s0.1s
[34m[1mval: [0mNew cache created: /home/kristoffel/datasets/dataset-02-Food-101/val.cache
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.001' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 39 weight(decay=0.0), 40 weight(decay=0.0005), 40 bias(decay=0.0)
Image sizes 224 train, 224 val
Using 8 datal

In [6]:

# --- 6) Evaluate on val and test ----------------------------------------------
from ultralytics import YOLO
import glob, os, torch

RUN_PREFIX = RUN_NAME
cands = glob.glob(os.path.join(MODEL_DIR, RUN_PREFIX + "*", "weights", "best.pt"))
assert cands, f"No best.pt found under {MODEL_DIR}/{RUN_PREFIX}*/weights/"
best_path = max(cands, key=os.path.getmtime)
print("Using best:", best_path)

IMGSZ = IMGSZ  # keep consistent

model = YOLO(best_path)

metrics_val = model.val(
    data=DATASET_ROOT,
    split="val",
    imgsz=IMGSZ,
    project=MODEL_DIR,
    name=RUN_PREFIX + "_val",
    device=0 if torch.cuda.is_available() else "cpu",
)

metrics_test = model.val(
    data=DATASET_ROOT,
    split="test",
    imgsz=IMGSZ,
    project=MODEL_DIR,
    name=RUN_PREFIX + "_test",
    device=0 if torch.cuda.is_available() else "cpu",
)
print("Done. Check metrics and plots under:", MODEL_DIR)


Using best: /home/kristoffel/models/food101_yolo11n_cls/weights/best.pt
Ultralytics 8.3.203 üöÄ Python-3.10.12 torch-2.2.0a0+81ea7a4 CUDA:0 (Tesla V100-SXM3-32GB, 32494MiB)
YOLO11n-cls summary (fused): 47 layers, 1,655,405 parameters, 0 gradients, 3.3 GFLOPs
[34m[1mtrain:[0m /home/kristoffel/datasets/dataset-02-Food-101/train... found 68175 images in 101 classes ‚úÖ 
[34m[1mval:[0m /home/kristoffel/datasets/dataset-02-Food-101/val... found 7575 images in 101 classes ‚úÖ 
[34m[1mtest:[0m /home/kristoffel/datasets/dataset-02-Food-101/test... found 25250 images in 101 classes ‚úÖ 
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 1279.2¬±262.2 MB/s, size: 58.6 KB)
[K[34m[1mval: [0mScanning /home/kristoffel/datasets/dataset-02-Food-101/val... 7575 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 7575/7575 12.7Mit/s 0.0s
[K               classes   top1_acc   top5_acc: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 474/474 96.0it/s 4.9s<0.1s
            



[K[34m[1mtest: [0mScanning /home/kristoffel/datasets/dataset-02-Food-101/test... 25250 images, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 25250/25250 3.6Kit/s 7.0s0.1s
[34m[1mtest: [0mNew cache created: /home/kristoffel/datasets/dataset-02-Food-101/test.cache
[K               classes   top1_acc   top5_acc: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 1579/1579 100.9it/s 15.6s<0.0s
                   all      0.838      0.963
Speed: 0.1ms preprocess, 0.3ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1m/home/kristoffel/models/food101_yolo11n_cls_test[0m
Done. Check metrics and plots under: /home/kristoffel/models


In [7]:

# --- 7) Quick prediction preview ----------------------------------------------
from ultralytics import YOLO
import random, glob, os

best_weights = best_path  # from previous cell
model = YOLO(best_weights)

# Sample up to 16 test images
cand = []
for cls in classes:
    cand += glob.glob(os.path.join(TEST_DIR, cls, "*.jpg"))
sample = random.sample(cand, k=min(16, len(cand))) if cand else []

out_name = RUN_NAME + "_preds"
if sample:
    _ = model.predict(
        source=sample,
        save=True,
        project=MODEL_DIR,
        name=out_name
    )
    print("Saved prediction previews to:", os.path.join(MODEL_DIR, out_name))
else:
    print("No test images found to preview.")



0: 224x224 lobster_roll_sandwich 1.00, club_sandwich 0.00, pulled_pork_sandwich 0.00, bruschetta 0.00, eggs_benedict 0.00, 0.3ms
1: 224x224 panna_cotta 0.97, chocolate_mousse 0.01, chocolate_cake 0.00, cheesecake 0.00, foie_gras 0.00, 0.3ms
2: 224x224 caprese_salad 0.59, bruschetta 0.39, beet_salad 0.01, beef_carpaccio 0.00, tuna_tartare 0.00, 0.3ms
3: 224x224 garlic_bread 0.98, apple_pie 0.01, bruschetta 0.00, pizza 0.00, french_toast 0.00, 0.3ms
4: 224x224 huevos_rancheros 1.00, breakfast_burrito 0.00, tacos 0.00, nachos 0.00, omelette 0.00, 0.3ms
5: 224x224 french_onion_soup 0.49, macaroni_and_cheese 0.20, clam_chowder 0.11, lasagna 0.07, gnocchi 0.03, 0.3ms
6: 224x224 pulled_pork_sandwich 1.00, prime_rib 0.00, hot_dog 0.00, grilled_cheese_sandwich 0.00, hamburger 0.00, 0.3ms
7: 224x224 red_velvet_cake 1.00, carrot_cake 0.00, chocolate_cake 0.00, churros 0.00, tiramisu 0.00, 0.3ms
8: 224x224 crab_cakes 0.49, gnocchi 0.13, grilled_salmon 0.05, macaroni_and_cheese 0.05, shrimp_and_gr