In [4]:
from pathlib import Path
import re
import pandas as pd

In [6]:
from pathlib import Path
import re
import pandas as pd

# Run this script from the repo root (the folder that contains "objects")
BASE_DIR_FS = Path("objects")          # filesystem path (relative to where you run the script)
BASE_DIR_WEB = "objects"               # what goes into the CSV

IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".tif", ".tiff", ".gif")

rows = []


def get_index_from_filename(fname: str) -> int:
    stem = Path(fname).stem

    m = re.search(r"\((\d+)\)$", stem)
    if m:
        return int(m.group(1))

    m = re.search(r"_(\d+)$", stem)
    if m:
        return int(m.group(1))

    return 999_999  # unnumbered files go at the end


def standardize_filename(fname: str) -> str:
    stem = Path(fname).stem
    ext = Path(fname).suffix

    m = re.search(r"\((\d+)\)$", stem)
    if m:
        n = m.group(1)
        base_stem = re.sub(r"\(\d+\)$", "", stem).rstrip()
        return f"{base_stem}_{n}{ext}"

    if re.search(r"_(\d+)$", stem):
        return fname

    return fname


for folder_path in sorted(BASE_DIR_FS.iterdir()):
    if not folder_path.is_dir():
        continue

    folder_name = folder_path.name

    if folder_name.lower() in ["small", "thumb", "thumbs"]:
        continue

    files = [
        p.name for p in folder_path.iterdir()
        if p.is_file() and p.suffix.lower() in IMAGE_EXTS
    ]

    if not files:
        continue

    indexed_files = []

    for fname in files:
        new_name = standardize_filename(fname)
        old_path = folder_path / fname
        new_path = folder_path / new_name

        if old_path != new_path:
            print(f"Renaming: {old_path} -> {new_path}")
            old_path.rename(new_path)

        indexed_files.append(new_name)

    files = sorted(indexed_files, key=get_index_from_filename)

    parent_id = f"{folder_name}_Original"
    first_file = files[0]

    # PARENT ROW
    rows.append({
        "object_id": parent_id,
        "article_id": folder_name,
        "image_display_template": "compound_object",
        "image_parent_id": "",
        "image_file": first_file,
        "image_object_location": f"{BASE_DIR_WEB}/{folder_name}/{first_file}",
    })

    # CHILD IMAGE ROWS
    for f in files:
        idx = get_index_from_filename(f)
        child_id = f"{folder_name}_img{idx}"

        rows.append({
            "object_id": child_id,
            "article_id": folder_name,
            "image_display_template": "image",
            "image_parent_id": parent_id,
            "image_file": f,
            "image_object_location": f"{BASE_DIR_WEB}/{folder_name}/{f}",
        })


df = pd.DataFrame(rows, columns=[
    "object_id",
    "article_id",
    "image_display_template",
    "image_parent_id",
    "image_file",
    "image_object_location",
])

df.to_csv("image_metadata_auto.csv", index=False)
print("DONE. Wrote image_metadata_auto.csv")


DONE. Wrote image_metadata_auto.csv
