In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from pathlib import Path

MYDRIVE = Path("/content/drive/MyDrive")
assert MYDRIVE.exists(), "❌ Drive not mounted correctly"

# Find any folder that contains your curated_by_class (this is the real project)
candidates = sorted([p for p in MYDRIVE.iterdir() if p.is_dir() and "SkinCare_AI_Component" in p.name])

print("Project folder candidates found:")
for p in candidates:
    print(" -", p)

PROJECT_ROOT = None
for p in candidates:
    if (p / "data" / "10_images" / "curated_by_class").exists():
        PROJECT_ROOT = p
        break

if PROJECT_ROOT is None:
    raise FileNotFoundError("❌ Could not find your project folder containing data/10_images/curated_by_class")

print("\n✅ Using PROJECT_ROOT:", PROJECT_ROOT)
print("✅ curated_by_class exists:", (PROJECT_ROOT / "data" / "10_images" / "curated_by_class").exists())


Project folder candidates found:
 - /content/drive/MyDrive/SkinCare_AI_Component

✅ Using PROJECT_ROOT: /content/drive/MyDrive/SkinCare_AI_Component
✅ curated_by_class exists: True


In [None]:
from pathlib import Path

CURATED_DIR = PROJECT_ROOT / "data" / "10_images" / "curated_by_class"
SPLITS_DIR  = PROJECT_ROOT / "data" / "10_images" / "splits"

SPLITS_DIR.mkdir(parents=True, exist_ok=True)

proof = SPLITS_DIR / "WHERE_AM_I.txt"
proof.write_text("If you see this file, this is the correct splits folder ✅")

print("✅ CURATED_DIR:", CURATED_DIR)
print("✅ SPLITS_DIR :", SPLITS_DIR)
print("✅ Wrote proof file:", proof)
print("Exists:", proof.exists())

print("\nSPLITS_DIR contents now:")
for p in SPLITS_DIR.iterdir():
    print(" -", p.name)


✅ CURATED_DIR: /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/curated_by_class
✅ SPLITS_DIR : /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/splits
✅ Wrote proof file: /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/splits/WHERE_AM_I.txt
Exists: True

SPLITS_DIR contents now:
 - train
 - val
 - test
 - WHERE_AM_I.txt


In [None]:
import random, shutil, re

CLASSES = ["acne","dark_spots","wrinkles","redness_prone","dry_irritated","normal"]
IMG_EXTS = {".jpg", ".jpeg", ".png", ".webp"}

TRAIN_RATIO = 0.70
VAL_RATIO   = 0.15
TEST_RATIO  = 0.15

MIN_VAL_GROUPS  = 10
MIN_TEST_GROUPS = 10

SEED = 42
random.seed(SEED)

def list_images_recursive(folder: Path):
    if not folder.exists():
        return []
    return [p for p in folder.rglob("*") if p.is_file() and p.suffix.lower() in IMG_EXTS]

_rf_pattern = re.compile(r"\.rf\.[A-Za-z0-9]+")
def base_key(filename: str) -> str:
    stem = Path(filename).stem
    suffix = Path(filename).suffix.lower()
    return _rf_pattern.sub("", stem) + suffix

def group_by_base(files):
    g = {}
    for f in files:
        g.setdefault(base_key(f.name), []).append(f)
    return g

print("=== CURATED COUNTS (recursive) ===")
for c in CLASSES:
    print(f"{c:15s} -> {len(list_images_recursive(CURATED_DIR / c))} images")


=== CURATED COUNTS (recursive) ===
acne            -> 3017 images
dark_spots      -> 1057 images
wrinkles        -> 1100 images
redness_prone   -> 1255 images
dry_irritated   -> 375 images
normal          -> 2640 images


In [None]:
if SPLITS_DIR.exists():
    shutil.rmtree(SPLITS_DIR)

for sp in ["train","val","test"]:
    for c in CLASSES:
        (SPLITS_DIR / sp / c).mkdir(parents=True, exist_ok=True)

print("✅ Fresh splits folders created at:", SPLITS_DIR)



✅ Fresh splits folders created at: /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/splits


In [None]:
def split_group_keys_with_min(keys, train_ratio, val_ratio, test_ratio, min_val, min_test):
    n = len(keys)

    if n < (min_val + min_test + 1):
        min_test = min(min_test, max(0, n-2))
        min_val  = min(min_val,  max(0, n-1-min_test))

    test_keys = keys[:min_test]
    val_keys  = keys[min_test:min_test + min_val]
    remaining = keys[min_test + min_val:]

    n_rem = len(remaining)
    n_train = int(n_rem * train_ratio)
    n_val   = int(n_rem * val_ratio)

    train_more = remaining[:n_train]
    val_more   = remaining[n_train:n_train+n_val]
    test_more  = remaining[n_train+n_val:]

    train_keys = train_more
    val_keys   = val_keys + val_more
    test_keys  = test_keys + test_more
    return train_keys, val_keys, test_keys


In [None]:
summary = {}

for c in CLASSES:
    files = list_images_recursive(CURATED_DIR / c)
    if len(files) == 0:
        raise ValueError(f"❌ Class '{c}' has 0 images. Add images first.")

    groups = group_by_base(files)
    keys = list(groups.keys())
    random.shuffle(keys)

    train_keys, val_keys, test_keys = split_group_keys_with_min(
        keys, TRAIN_RATIO, VAL_RATIO, TEST_RATIO, MIN_VAL_GROUPS, MIN_TEST_GROUPS
    )

    def copy_groups(keys_list, split_name):
        dst_dir = SPLITS_DIR / split_name / c
        copied = 0
        for k in keys_list:
            for src in groups[k]:
                shutil.copy2(src, dst_dir / src.name)
                copied += 1
        return copied

    tr = copy_groups(train_keys, "train")
    va = copy_groups(val_keys, "val")
    te = copy_groups(test_keys, "test")

    summary[c] = (tr, va, te, len(keys))

print("\n✅ SPLIT SUMMARY (train/val/test images, and groups)")
for c in CLASSES:
    tr, va, te, g = summary[c]
    print(f"{c:15s} train={tr:5d} val={va:5d} test={te:5d} | groups={g}")



✅ SPLIT SUMMARY (train/val/test images, and groups)
acne            train= 2097 val=  459 test=  461 | groups=3017
dark_spots      train=  725 val=  165 test=  167 | groups=1057
wrinkles        train=  756 val=  172 test=  172 | groups=1100
redness_prone   train=  835 val=  235 test=  185 | groups=195
dry_irritated   train=  213 val=   72 test=   90 | groups=91
normal          train= 1805 val=  400 test=  435 | groups=1245


In [None]:
def count_split(split_name):
    print(f"\n{split_name.upper()}:")
    total = 0
    for c in CLASSES:
        folder = SPLITS_DIR / split_name / c
        n = len([p for p in folder.iterdir() if p.is_file() and p.suffix.lower() in IMG_EXTS])
        total += n
        print(f"  {c:15s} {n}")
    print(f"  {'TOTAL':15s} {total}")

count_split("train")
count_split("val")
count_split("test")



TRAIN:
  acne            2097
  dark_spots      725
  wrinkles        756
  redness_prone   835
  dry_irritated   213
  normal          1805
  TOTAL           6431

VAL:
  acne            459
  dark_spots      165
  wrinkles        172
  redness_prone   235
  dry_irritated   72
  normal          400
  TOTAL           1503

TEST:
  acne            461
  dark_spots      167
  wrinkles        172
  redness_prone   185
  dry_irritated   90
  normal          435
  TOTAL           1510
