In [4]:

ROOT_DIR     = r".\Nutrition5K\Nutrition5K\train"                 # 数据根目录，包含 ./train/...
LABELS_CSV   = r".\Nutrition5K\Nutrition5K\nutrition5k_train.csv"       
OUT_CSV      = "./train_index.csv" 
# =====================================

from pathlib import Path
import csv

IMG_EXTS = {".png"}

def norm_id(x: str) -> str:
    x = x.strip()
    if x.startswith("dish_"): 
        return x
    return f"dish_{x.zfill(4) if x.isdigit() else x}"

def first_image(p: Path) -> str:
    if not p.is_dir(): 
        return ""
    for f in sorted(p.iterdir()):
        if f.is_file() and f.suffix.lower() in IMG_EXTS:
            return str(f)
    return ""

def pick_with_basename(dirpath: Path, basename: str) -> str:
    if not dirpath.is_dir(): 
        return ""
    for ext in IMG_EXTS:
        f = dirpath / f"{basename}{ext}"
        if f.is_file():
            return str(f)
    return first_image(dirpath) 

def load_labels(csv_path: Path) -> dict:
    m = {}
    with csv_path.open("r", newline="", encoding="utf-8") as f:
        r = csv.DictReader(f)
        for row in r:
            m[norm_id(row["ID"])] = str(row["Value"]).strip()
    return m

def main():
    root = Path(ROOT_DIR)
    color_root = root/ "color"
    dcolor_root = root/ "depth_color"
    draw_root   = root/ "depth_raw"

    dish_ids = set()
    for p in (color_root, dcolor_root, draw_root):
        if p.is_dir():
            for d in p.iterdir():
                if d.is_dir():
                    dish_ids.add(d.name)

    labels = load_labels(Path(LABELS_CSV))

    all_ids = sorted(dish_ids | set(labels.keys()))
    rows = []
    for did in all_ids:
        rgb_dir = color_root / did
        dc_dir  = dcolor_root / did
        dr_dir  = draw_root / did

        rgb = pick_with_basename(rgb_dir, "rgb")
        dcp = pick_with_basename(dc_dir, "depth_color")
        drp = pick_with_basename(dr_dir, "depth_raw")

        rows.append({
            "dish_id": did,
            "label": labels.get(did, ""),
            "rgb_path": rgb,
            "depth_color_path": dcp,
            "depth_raw_path": drp
        })

    outp = Path(OUT_CSV)
    outp.parent.mkdir(parents=True, exist_ok=True)
    with outp.open("w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=["dish_id","label","rgb_path","depth_color_path","depth_raw_path"])
        w.writeheader()
        w.writerows(rows)

    print(f"[OK] wrote {len(rows)} rows -> {outp}")


main()


[OK] wrote 3301 rows -> train_index.csv
