In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
from pathlib import Path
import json
import pandas as pd
from collections import defaultdict

PROJECT_ROOT = Path("/content/drive/MyDrive/SkinCare_AI_Component")

SPLITS_DIR = PROJECT_ROOT / "data" / "10_images" / "splits"
META_DIR   = PROJECT_ROOT / "data" / "10_images" / "metadata"

CLASSES = ["acne","dark_spots","wrinkles","redness_prone","dry_irritated","normal"]
SPLITS  = ["train","val","test"]
IMG_EXTS = {".jpg", ".jpeg", ".png", ".webp"}

def is_img(p: Path) -> bool:
    return p.is_file() and p.suffix.lower() in IMG_EXTS

META_DIR.mkdir(parents=True, exist_ok=True)

print("SPLITS_DIR exists:", SPLITS_DIR.exists())
print("META_DIR:", META_DIR)


SPLITS_DIR exists: True
META_DIR: /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/metadata


In [3]:
counts = defaultdict(dict)

for sp in SPLITS:
    for cls in CLASSES:
        cls_dir = SPLITS_DIR / sp / cls
        n = 0
        if cls_dir.exists():
            n = sum(1 for p in cls_dir.iterdir() if is_img(p))
        counts[sp][cls] = n

for sp in SPLITS:
    print(f"\n{sp.upper()}:")
    total = 0
    for cls in CLASSES:
        n = counts[sp][cls]
        total += n
        print(f"  {cls:15s} {n}")
    print(f"  {'TOTAL':15s} {total}")



TRAIN:
  acne            2097
  dark_spots      725
  wrinkles        756
  redness_prone   835
  dry_irritated   213
  normal          1805
  TOTAL           6431

VAL:
  acne            459
  dark_spots      165
  wrinkles        172
  redness_prone   235
  dry_irritated   72
  normal          400
  TOTAL           1503

TEST:
  acne            461
  dark_spots      167
  wrinkles        172
  redness_prone   185
  dry_irritated   90
  normal          435
  TOTAL           1510


In [4]:
label_map = {name: i for i, name in enumerate(CLASSES)}
label_map_path = META_DIR / "label_map_concerns.json"

with open(label_map_path, "w") as f:
    json.dump(label_map, f, indent=2)

print("✅ Saved:", label_map_path)
label_map


✅ Saved: /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/metadata/label_map_concerns.json


{'acne': 0,
 'dark_spots': 1,
 'wrinkles': 2,
 'redness_prone': 3,
 'dry_irritated': 4,
 'normal': 5}

In [5]:
rows = []
for sp in SPLITS:
    for cls in CLASSES:
        cls_dir = SPLITS_DIR / sp / cls
        if not cls_dir.exists():
            continue

        for img_path in cls_dir.iterdir():
            if not is_img(img_path):
                continue

            rows.append({
                "image_path": str(img_path.relative_to(PROJECT_ROOT)),
                "label_name": cls,
                "label_id": label_map[cls],
                "split": sp,
            })

df = pd.DataFrame(rows).sort_values(["split","label_id","image_path"]).reset_index(drop=True)

index_path = META_DIR / "image_index_concerns.csv"
df.to_csv(index_path, index=False)

print("✅ Saved:", index_path)
print("Rows:", len(df))
df.head()


✅ Saved: /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/metadata/image_index_concerns.csv
Rows: 9444


Unnamed: 0,image_path,label_name,label_id,split
0,data/10_images/splits/test/acne/07Acne081101.jpg,acne,0,test
1,data/10_images/splits/test/acne/07Acne0811011 ...,acne,0,test
2,data/10_images/splits/test/acne/07RosaceaFulFA...,acne,0,test
3,data/10_images/splits/test/acne/07SteroidPerio...,acne,0,test
4,data/10_images/splits/test/acne/1.png,acne,0,test


In [6]:
dup = df["image_path"].duplicated().sum()
print("Duplicate image_path rows:", dup)

sample_n = min(200, len(df))
missing = 0
for p in df["image_path"].sample(sample_n, random_state=42):
    if not (PROJECT_ROOT / p).exists():
        missing += 1
print(f"Missing files in sample {sample_n}:", missing)


Duplicate image_path rows: 0
Missing files in sample 200: 0
