# Food-101 Download & Setup

This notebook downloads the Food-101 dataset and prepares a local folder layout usable by the training notebook. It also lets you define a subset of 30–50 classes and saves a `config.json` you can reuse.


In [None]:
import os, json, zipfile
from pathlib import Path

PROJECT_ROOT = Path(r"C:/Users/Name/OneDrive - The University of Texas at Austin/UT Austin-DEVICE/Deep L/Mini Project/Image_Classification_DL") #Change based on your own path  	
DATA_DIR = PROJECT_ROOT / "data"
FOOD101_DIR = DATA_DIR / "food-101"
CONFIG_PATH = PROJECT_ROOT / "config.json"

DATA_DIR.mkdir(parents=True, exist_ok=True)
print("Project root:", PROJECT_ROOT)
print("Data dir:", DATA_DIR)


In [None]:
# Download using torchvision (guarded)
from pathlib import Path

def has_images_meta(p: Path) -> bool:
    return (p / "images").exists() and (p / "meta").exists()

if not has_images_meta(FOOD101_DIR):
    from torchvision.datasets import Food101 as TVFood101
    print("Downloading Food-101 via torchvision (first run may take a while)...")
    _ = TVFood101(root=str(DATA_DIR), download=True)
else:
    print("Food-101 already present; skipping download.")


In [None]:
# Resolve root with images/ and meta/
from pathlib import Path

def has_images_meta(p: Path) -> bool:
    return (p / "images").exists() and (p / "meta").exists()

ROOT = None
candidates = [FOOD101_DIR, DATA_DIR / "food-101", *(p for p in DATA_DIR.glob("**/food-101"))]
for cand in candidates:
    if has_images_meta(cand):
        ROOT = cand
        break

if ROOT is None:
    raise FileNotFoundError(
        "Could not find 'food-101' with images/ and meta/ under data dir after download attempts.\n"
        f"Please place it manually under: {DATA_DIR}"
    )

print("Resolved FOOD-101 root:", ROOT)
print("Subdirs:", os.listdir(ROOT))


In [None]:
# Select subset of classes and write config.json
import random
random.seed(42)

all_classes = sorted(os.listdir(ROOT / "images"))
subset_size = 30  # change to 30–50 as needed
if subset_size > len(all_classes):
    subset_size = len(all_classes)
selected_classes = sorted(random.sample(all_classes, k=subset_size))

config = {
    "dataset_root": str(ROOT),
    "class_names": selected_classes,
    "image_size": 128,
    "split_protocol": "standard",
    "seed": 42,
}
with open(CONFIG_PATH, "w", encoding="utf-8") as f:
    json.dump(config, f, indent=2)

print("Wrote config:", CONFIG_PATH)
print(config)
