In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!pip -q install timm pandas pyarrow tqdm

In [3]:
from pathlib import Path
import json
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

import timm

PROJECT_ROOT = Path("/content/drive/MyDrive/SkinCare_AI_Component")

# Vision model
MODEL_PATH = PROJECT_ROOT / "models" / "vision" / "concerns_best.pt"

# Text datasets
TEXT_RAW = PROJECT_ROOT / "data" / "20_text" / "raw"
ING_PATH  = TEXT_RAW / "ingredient_knowledge_10000.csv"
ALL_PATH  = TEXT_RAW / "allergy_sensitivity_10000.csv"
ROUT_PATH = TEXT_RAW / "routine_mapping_10000.csv"
BEAUTY_PARQUET = TEXT_RAW / "beauty.parquet"  # optional

# Outputs (optional)
OUT_DIR = PROJECT_ROOT / "results" / "demo_outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("MODEL_PATH exists:", MODEL_PATH.exists(), MODEL_PATH)
print("TEXT_RAW:", TEXT_RAW, "| exists:", TEXT_RAW.exists())
print("ingredient_knowledge exists:", ING_PATH.exists())
print("allergy_sensitivity exists:", ALL_PATH.exists())
print("routine_mapping exists:", ROUT_PATH.exists())
print("beauty.parquet exists:", BEAUTY_PARQUET.exists())


MODEL_PATH exists: True /content/drive/MyDrive/SkinCare_AI_Component/models/vision/concerns_best.pt
TEXT_RAW: /content/drive/MyDrive/SkinCare_AI_Component/data/20_text/raw | exists: True
ingredient_knowledge exists: True
allergy_sensitivity exists: True
routine_mapping exists: True
beauty.parquet exists: True


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

ckpt = torch.load(MODEL_PATH, map_location=device)
label_map = ckpt.get("label_map", None)

if label_map is None:
    raise ValueError("label_map not found inside concerns_best.pt. Re-save model with label_map included.")

id_to_label = {v:k for k,v in label_map.items()}
num_classes = len(label_map)

model = timm.create_model("deit_tiny_patch16_224", pretrained=False, num_classes=num_classes)
model.load_state_dict(ckpt["model_state"])
model = model.to(device)
model.eval()

print("✅ Loaded model with classes:", [id_to_label[i] for i in range(num_classes)])


Device: cpu
✅ Loaded model with classes: ['acne', 'dark_spots', 'wrinkles', 'redness_prone', 'dry_irritated', 'normal']


In [5]:
IMG_SIZE = 224
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

eval_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])


In [6]:
def predict_concern_from_path(image_path: str):
    """
    image_path can be:
      - absolute path (/content/...)
      - drive-relative under PROJECT_ROOT (data/10_images/...)
    """
    p = Path(image_path)
    if not p.exists():
        p = PROJECT_ROOT / image_path
    if not p.exists():
        raise FileNotFoundError(f"Image not found: {image_path}")

    img = Image.open(p).convert("RGB")
    x = eval_tfms(img).unsqueeze(0).to(device)

    with torch.no_grad():
        logits = model(x)
        probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    pred_id = int(np.argmax(probs))
    return {
        "image_path": str(p),
        "pred_label": id_to_label[pred_id],
        "confidence": float(probs[pred_id]),
        "probs": {id_to_label[i]: float(probs[i]) for i in range(num_classes)}
    }


In [7]:
def load_csv_safe(path: Path):
    if not path.exists():
        return None
    df = pd.read_csv(path)
    # normalize column names
    df.columns = [c.strip().lower() for c in df.columns]
    return df

ingredient_df = load_csv_safe(ING_PATH)
allergy_df    = load_csv_safe(ALL_PATH)
routine_df    = load_csv_safe(ROUT_PATH)

print("ingredient_df:", None if ingredient_df is None else ingredient_df.shape)
print("allergy_df   :", None if allergy_df is None else allergy_df.shape)
print("routine_df   :", None if routine_df is None else routine_df.shape)


ingredient_df: (10000, 10)
allergy_df   : (10000, 7)
routine_df   : (10000, 9)


In [8]:
def find_col(df, candidates):
    if df is None: return None
    for c in candidates:
        if c in df.columns:
            return c
    return None

# ingredient knowledge columns
COL_ING_NAME   = find_col(ingredient_df, ["ingredient", "ingredient_name", "name"])
COL_ING_BENEF  = find_col(ingredient_df, ["benefit", "benefits", "skin_benefit", "target"])
COL_ING_CONCERN= find_col(ingredient_df, ["concern", "skin_concern", "condition", "label"])

# allergy columns
COL_ALL_ING    = find_col(allergy_df, ["ingredient", "ingredient_name", "name"])
COL_ALL_RISK   = find_col(allergy_df, ["risk", "risk_level", "severity", "reaction"])

# routine mapping columns
COL_ROUT_CONCERN = find_col(routine_df, ["concern", "skin_concern", "condition", "label"])
COL_ROUT_AM      = find_col(routine_df, ["am_routine", "am", "morning"])
COL_ROUT_PM      = find_col(routine_df, ["pm_routine", "pm", "night"])

print("Ingredient cols:", COL_ING_NAME, COL_ING_BENEF, COL_ING_CONCERN)
print("Allergy cols   :", COL_ALL_ING, COL_ALL_RISK)
print("Routine cols   :", COL_ROUT_CONCERN, COL_ROUT_AM, COL_ROUT_PM)


Ingredient cols: ingredient_name None skin_concern
Allergy cols   : None severity
Routine cols   : None am_routine pm_routine


In [9]:
def normalize_text(s):
    return str(s).strip().lower()

def get_avoid_list(user_allergies, allergy_df):
    """
    user_allergies: list[str] ingredient names user says they react to / want to avoid.
    We'll also include mapped risky ingredients if allergy_df exists.
    """
    avoid = set(normalize_text(a) for a in (user_allergies or []) if str(a).strip())

    # if allergy_df exists, optionally expand avoid list by "high risk" items
    # (only if user mentions allergy categories / you want strict safety)
    return sorted(avoid)

def recommend_ingredients(concern_label, ingredient_df, top_k=6):
    """
    Picks ingredients from ingredient_knowledge that match the concern.
    If dataset doesn't have concern column, falls back to random sample.
    """
    if ingredient_df is None or COL_ING_NAME is None:
        return []

    df = ingredient_df.copy()

    # filter by concern if possible
    if COL_ING_CONCERN and COL_ING_CONCERN in df.columns:
        df["_concern"] = df[COL_ING_CONCERN].astype(str).str.lower()
        df = df[df["_concern"].str.contains(normalize_text(concern_label), na=False)]

    # keep only needed columns
    out = []
    for _, r in df.head(200).iterrows():
        ing = r.get(COL_ING_NAME, "")
        ben = r.get(COL_ING_BENEF, "") if COL_ING_BENEF else ""
        if str(ing).strip():
            out.append({"ingredient": str(ing).strip(), "benefit": str(ben).strip()})
        if len(out) >= top_k:
            break

    # fallback if nothing found
    if not out:
        sample = ingredient_df.dropna(subset=[COL_ING_NAME]).sample(min(top_k, len(ingredient_df)), random_state=42)
        for _, r in sample.iterrows():
            out.append({
                "ingredient": str(r[COL_ING_NAME]).strip(),
                "benefit": str(r[COL_ING_BENEF]).strip() if COL_ING_BENEF else ""
            })
    return out

def get_routine(concern_label, routine_df):
    """
    Returns AM/PM routine strings from routine_mapping.
    Falls back to generic safe routine if no match.
    """
    generic_am = "Gentle cleanser → Lightweight moisturizer → Sunscreen (SPF 30+)"
    generic_pm = "Gentle cleanser → Moisturizer (barrier-supporting)"

    if routine_df is None or COL_ROUT_CONCERN is None:
        return generic_am, generic_pm

    df = routine_df.copy()
    df["_concern"] = df[COL_ROUT_CONCERN].astype(str).str.lower()
    match = df[df["_concern"].str.contains(normalize_text(concern_label), na=False)]

    if len(match) == 0:
        return generic_am, generic_pm

    row = match.iloc[0]
    am = str(row.get(COL_ROUT_AM, generic_am)).strip() if COL_ROUT_AM else generic_am
    pm = str(row.get(COL_ROUT_PM, generic_pm)).strip() if COL_ROUT_PM else generic_pm

    # keep it cosmetic-safe: if empty, use generic
    if not am: am = generic_am
    if not pm: pm = generic_pm
    return am, pm

def build_advice(pred, user_profile):
    """
    pred: output from predict_concern_from_path
    user_profile: dict with keys like skin_type, allergies, preferences
    """
    concern = pred["pred_label"]
    conf = pred["confidence"]

    allergies = user_profile.get("allergies", [])
    skin_type = user_profile.get("skin_type", "unknown")
    preferences = user_profile.get("preferences", [])

    avoid_list = get_avoid_list(allergies, allergy_df)
    rec_ings = recommend_ingredients(concern, ingredient_df, top_k=6)

    # remove recommended ingredients that user wants to avoid
    rec_ings = [x for x in rec_ings if normalize_text(x["ingredient"]) not in set(avoid_list)]

    am, pm = get_routine(concern, routine_df)

    # add small safety note
    safety_note = ("This is cosmetic guidance only (not a medical diagnosis). "
                   "Patch-test new products and avoid ingredients you know you react to.")

    return {
        "predicted_concern": concern,
        "confidence": conf,
        "user_skin_type": skin_type,
        "avoid_ingredients": avoid_list,
        "recommended_ingredients": rec_ings,
        "routine_am": am,
        "routine_pm": pm,
        "note": safety_note
    }


In [10]:
# ✅ CHANGE THIS to any image you want to test
DEMO_IMAGE = "data/10_images/splits/test/acne/"  # <-- put a real filename after this

# If you don't know filename, we'll auto-pick one from splits:
from glob import glob

def pick_any_test_image():
    base = PROJECT_ROOT / "data/10_images/splits/test"
    files = []
    for c in ["acne","dark_spots","wrinkles","redness_prone","dry_irritated","normal"]:
        files.extend(list((base / c).glob("*.jpg")))
        files.extend(list((base / c).glob("*.png")))
        files.extend(list((base / c).glob("*.jpeg")))
        if len(files) > 0:
            return str(files[0])
    return None

picked = pick_any_test_image()
print("Picked test image:", picked)

pred = predict_concern_from_path(picked)

user_profile = {
    "skin_type": "combination",                 # user input
    "allergies": ["fragrance"],                 # user input (example)
    "preferences": ["budget-friendly", "gentle"]# user input (example)
}

advice = build_advice(pred, user_profile)

pred, advice


Picked test image: /content/drive/MyDrive/SkinCare_AI_Component/data/10_images/splits/test/acne/2 (51).jpg


({'image_path': '/content/drive/MyDrive/SkinCare_AI_Component/data/10_images/splits/test/acne/2 (51).jpg',
  'pred_label': 'acne',
  'confidence': 0.997979462146759,
  'probs': {'acne': 0.997979462146759,
   'dark_spots': 0.001270425389520824,
   'wrinkles': 3.296184877399355e-05,
   'redness_prone': 0.0004270707140676677,
   'dry_irritated': 0.0002571230579633266,
   'normal': 3.2936302886810154e-05}},
 {'predicted_concern': 'acne',
  'confidence': 0.997979462146759,
  'user_skin_type': 'combination',
  'avoid_ingredients': ['fragrance'],
  'recommended_ingredients': [{'ingredient': 'sulfur', 'benefit': ''},
   {'ingredient': 'niacinamide', 'benefit': ''},
   {'ingredient': 'azelaic acid', 'benefit': ''},
   {'ingredient': 'zinc PCA', 'benefit': ''},
   {'ingredient': 'salicylic acid', 'benefit': ''},
   {'ingredient': 'zinc PCA', 'benefit': ''}],
  'routine_am': 'Gentle cleanser → Lightweight moisturizer → Sunscreen (SPF 30+)',
  'routine_pm': 'Gentle cleanser → Moisturizer (barrier-

In [11]:
def print_advice(advice):
    print("===== SkinCare AI Component Output =====")
    print(f"Predicted concern : {advice['predicted_concern']}  (confidence: {advice['confidence']:.3f})")
    print(f"User skin type    : {advice['user_skin_type']}")
    print()

    print("Avoid ingredients (user-reported):")
    if advice["avoid_ingredients"]:
        for a in advice["avoid_ingredients"]:
            print(" -", a)
    else:
        print(" - None")
    print()

    print("Recommended ingredients:")
    if advice["recommended_ingredients"]:
        for r in advice["recommended_ingredients"]:
            if r["benefit"]:
                print(f" - {r['ingredient']}  → {r['benefit']}")
            else:
                print(f" - {r['ingredient']}")
    else:
        print(" - No recommendations found (check ingredient dataset columns).")
    print()

    print("AM Routine:")
    print(" -", advice["routine_am"])
    print("\nPM Routine:")
    print(" -", advice["routine_pm"])
    print()

    print("Note:")
    print(" -", advice["note"])

print_advice(advice)


===== SkinCare AI Component Output =====
Predicted concern : acne  (confidence: 0.998)
User skin type    : combination

Avoid ingredients (user-reported):
 - fragrance

Recommended ingredients:
 - sulfur
 - niacinamide
 - azelaic acid
 - zinc PCA
 - salicylic acid
 - zinc PCA

AM Routine:
 - Gentle cleanser → Lightweight moisturizer → Sunscreen (SPF 30+)

PM Routine:
 - Gentle cleanser → Moisturizer (barrier-supporting)

Note:
 - This is cosmetic guidance only (not a medical diagnosis). Patch-test new products and avoid ingredients you know you react to.


In [12]:
out_path = OUT_DIR / "demo_output_example.json"
with open(out_path, "w") as f:
    json.dump(advice, f, indent=2)

print("✅ Saved demo output:", out_path)


✅ Saved demo output: /content/drive/MyDrive/SkinCare_AI_Component/results/demo_outputs/demo_output_example.json
