In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd

In [3]:
CSV_IN = "/content/drive/MyDrive/datasets/FFHQaging/FFHQaging.csv"
assert os.path.exists(CSV_IN), f"CSV not found at {CSV_IN}"

# fname
df = pd.read_csv(CSV_IN)
df["fname"] = df["image_number"].apply(lambda x: f"{int(x):05d}.png")

# one‑hot age bins
bins_order = ["0-2","3-6","7-9","10-14","15-19",
              "20-29","30-39","40-49","50-69","70-120"]

for b in bins_order:
    col = f"agebin_{b.replace('-','_')}"      # → agebin_0_2 …
    df[col] = (df["age_group"] == b).astype(int)

# train / test split: first 60 k → train, last 10 k → test
df["is_train"] = df["image_number"] < 60000

# keep only the columns CUSP cares about
wanted_cols = ["fname"] + [f"agebin_{b.replace('-','_')}" for b in bins_order] + ["is_train"]
df_clean = df[wanted_cols]

In [5]:
# write csv
OUT_ROOT    = "/content/drive/MyDrive/datasets/FFHQaging_LS"      # new home
os.makedirs(OUT_ROOT, exist_ok=True)
csv_out = f"{OUT_ROOT}/FFHQaging.csv"
df_clean.to_csv(csv_out, index=False)
print("CSV written:", csv_out, "  →", df_clean.shape, "rows, columns", df_clean.columns.tolist())

CSV written: /content/drive/MyDrive/datasets/FFHQaging_LS/FFHQaging.csv   → (70000, 12) rows, columns ['fname', 'agebin_0_2', 'agebin_3_6', 'agebin_7_9', 'agebin_10_14', 'agebin_15_19', 'agebin_20_29', 'agebin_30_39', 'agebin_40_49', 'agebin_50_69', 'agebin_70_120', 'is_train']
