In [None]:
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m109.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1


In [None]:
# imports

import os
import re

import numpy as np
import pandas as pd

from tqdm.auto import tqdm

from sentence_transformers import SentenceTransformer
import faiss

from google.colab import drive


In [None]:
# mount ggl drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
#  file paths

# dataset csvs
REVIEWS_PATH = '/content/drive/MyDrive/sephora/cleaned_reviews.csv'
PRODUCTS_PATH = '/content/drive/MyDrive/sephora/cleaned_products.csv'

# embeddings + faiss index
EMB_DIR = '/content/drive/MyDrive/sephora2'

# make sure directory exists
os.makedirs(EMB_DIR, exist_ok=True)


In [None]:
# load data

reviews_df = pd.read_csv(REVIEWS_PATH)
products_df = pd.read_csv(PRODUCTS_PATH)

print("reviews shape:", reviews_df.shape)
print("products shape:", products_df.shape)


  reviews_df = pd.read_csv(REVIEWS_PATH)


reviews shape: (119312, 22)
products shape: (8494, 37)


In [None]:
# merge reviews with product metadata on product_id

df = reviews_df.merge(
    products_df,
    on="product_id",
    how="inner",
    suffixes=("_review", "_product"),
)

print("merged shape:", df.shape)
df.head(3)


merged shape: (119312, 58)


Unnamed: 0,author_id,rating_review,is_recommended,helpfulness,total_feedback_count,total_neg_feedback_count,total_pos_feedback_count,submission_time,review_text,review_title,...,brand_name_clean,variation_type_clean,variation_value_clean,highlights_clean,ingredients_clean,primary_category_clean,secondary_category_clean,tertiary_category_clean,text_nlp,cleanliness_score
0,8554483509,2,0,0,0,0,0,2023-03-21,This was gifted by Supergoop! in exchange for ...,Nice packaging but easy to overuse,...,supergoop,['color'],['translucent'],"[""['clean at sephora'"", ""'spf']""]","[""['zinc oxide 24"", '7%', 'calcium aluminum bo...",skincare,sunscreen,face_sunscreen,re setting 100 mineral powder sunscreen spf 35...,0.971
1,24710523057,2,0,2,2,0,2,2023-03-07,I didn’t like it; too much product comes out w...,Packaging is not suits le,...,supergoop,['color'],['translucent'],"[""['clean at sephora'"", ""'spf']""]","[""['zinc oxide 24"", '7%', 'calcium aluminum bo...",skincare,sunscreen,face_sunscreen,re setting 100 mineral powder sunscreen spf 35...,0.971
2,8429283179,5,1,32,34,2,32,2023-03-01,Y’all….I’m begging for everyone to read instru...,PLS READ THIS LOL,...,supergoop,['color'],['translucent'],"[""['clean at sephora'"", ""'spf']""]","[""['zinc oxide 24"", '7%', 'calcium aluminum bo...",skincare,sunscreen,face_sunscreen,re setting 100 mineral powder sunscreen spf 35...,0.971


In [None]:
# rich text field per review for embeddings and inspection

def build_review_document(row):
    parts = []

    # product-level context
    parts.append(f"product name: {row.get('product_name', '')}")
    parts.append(f"brand: {row.get('brand_name', '')}")
    parts.append(
        f"category: {row.get('primary_category', '')} / {row.get('secondary_category', '')}"
    )
    parts.append(f"price: {row.get('price_usd', '')}")

    # highlights / ingredients
    highlights = row.get("highlights")
    ingredients = row.get("ingredients")

    if isinstance(highlights, str) and highlights.strip():
        parts.append(f"highlights: {highlights}")
    if isinstance(ingredients, str) and ingredients.strip():
        parts.append(f"ingredients: {ingredients}")

    # reviewer metadata
    parts.append(f"reviewer skin type: {row.get('skin_type', 'unknown')}")
    parts.append(f"reviewer skin tone: {row.get('skin_tone', 'unknown')}")
    parts.append(f"reviewer age range: {row.get('age_range', 'unknown')}")

    # review text + rating
    parts.append(f"review rating: {row.get('rating_review', '')} / 5")
    if row.get("is_recommended") is not None:
        parts.append(f"recommended by reviewer: {row.get('is_recommended')}")
    if isinstance(row.get("review_title"), str) and row["review_title"].strip():
        parts.append(f"review title: {row['review_title']}")
    if isinstance(row.get("review_text"), str) and row["review_text"].strip():
        parts.append(f"review text: {row['review_text']}")

    return "\n".join(parts)


In [None]:
# apply to all rows (this does not change the order of df)

tqdm.pandas()
df["doc_text"] = df.progress_apply(build_review_document, axis=1)

df[["product_id", "doc_text"]].head(1)["doc_text"].iloc[0]


  0%|          | 0/119312 [00:00<?, ?it/s]

"product name: \nbrand: \ncategory: Skincare / Sunscreen\nprice: \nhighlights: ['Clean at Sephora', 'SPF']\ningredients: ['Zinc Oxide 24.7%, Calcium Aluminum Borosilicate, Silica, Trimethylsiloxysilicate, Calcium Sodium Borosilicate, Polymethyl Methacrylate, Lauroyl Lysine, Polyglyceryl-10 Pentaisostearate, Boron Nitride, Triethoxycaprylylsilane, Ethylhexylglycerin, Nylon-6/12, Sodium Dehyrdoacetate, Olive Glycerides, Ascorbyl Palmitate, Ceramide 3, May contain Iron Oxides (CI 77492, 77491, 77499).']\nreviewer skin type: Combination\nreviewer skin tone: Light\nreviewer age range: unknown\nreview rating: 2 / 5\nrecommended by reviewer: 0\nreview title: Nice packaging but easy to overuse\nreview text: This was gifted by Supergoop! in exchange for an honest review.  The packaging is nice and seems like it will prevent any powder from getting out. My issue is that it’s very hard to know how much powder you are using.  Looking at the powder directly, it looks quite shimmery/glittery.  On th

In [None]:
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer("all-MiniLM-L6-v2")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
import os
import numpy as np
import faiss

EMB_DIR = "/content/drive/MyDrive/sephora"

# use the REAL filenames in that folder
EMB_PATH = os.path.join(EMB_DIR, "embeddings.npy")
INDEX_PATH = os.path.join(EMB_DIR, "faiss_index.bin")

embeddings = np.load(EMB_PATH)
index = faiss.read_index(INDEX_PATH)

print("loaded embeddings shape:", embeddings.shape)
print("loaded faiss index vectors:", index.ntotal)


loaded embeddings shape: (119312, 384)
loaded faiss index vectors: 119312


In [None]:
reviews_df.columns


Index(['author_id', 'rating', 'is_recommended', 'helpfulness',
       'total_feedback_count', 'total_neg_feedback_count',
       'total_pos_feedback_count', 'submission_time', 'review_text',
       'review_title', 'skin_tone', 'eye_color', 'skin_type', 'hair_color',
       'product_id', 'product_name', 'brand_name', 'price_usd',
       'review_text_processed', 'review_title_processed', 'full_review',
       'full_review_processed'],
      dtype='object')

In [None]:
products_df.columns


Index(['product_id', 'product_name', 'brand_id', 'brand_name', 'loves_count',
       'rating', 'reviews', 'size', 'variation_type', 'variation_value',
       'variation_desc', 'ingredients', 'price_usd', 'value_price_usd',
       'sale_price_usd', 'limited_edition', 'new', 'online_only',
       'out_of_stock', 'sephora_exclusive', 'highlights', 'primary_category',
       'secondary_category', 'tertiary_category', 'child_count',
       'child_max_price', 'child_min_price', 'brand_name_clean',
       'variation_type_clean', 'variation_value_clean', 'highlights_clean',
       'ingredients_clean', 'primary_category_clean',
       'secondary_category_clean', 'tertiary_category_clean', 'text_nlp',
       'cleanliness_score'],
      dtype='object')

In [None]:
# build product-level aggregates (ratings, review count, price, highlights)

product_agg = products_df.groupby("product_id").agg(
    product_name=("product_name", "first"),
    brand_name=("brand_name", "first"),
    primary_category=("primary_category", "first"),
    secondary_category=("secondary_category", "first"),
    price_usd=("price_usd", "first"),
    rating_product=("rating", "first"),   # product-level rating
    n_reviews=("reviews", "first"),       # number of reviews on the site
    highlights=("highlights", "first")
).reset_index()

print(product_agg.head())


  product_id                        product_name                   brand_name  \
0  P01018539       master mattes liquid eyeliner              makeup by mario   
1  P02318798  master metallics eyeshadow palette              makeup by mario   
2  P04141879       liquid touch foundation brush  rare beauty by selena gomez   
3  P04456664                 master eye prep set              makeup by mario   
4  P04546871            body lava body luminizer      fenty beauty by rihanna   

  primary_category     secondary_category  price_usd  rating_product  \
0           Makeup                    Eye       24.0          3.8428   
1           Makeup                    Eye       50.0          3.4063   
2           Makeup  Brushes & Applicators       29.0          4.5796   
3           Makeup                    Eye       30.0          3.5946   
4      Bath & Body      Body Moisturizers       59.0          3.8327   

   n_reviews                                         highlights  
0      159.0  

In [None]:
product_agg.head()


Unnamed: 0,product_id,product_name,brand_name,primary_category,secondary_category,price_usd,rating_product,n_reviews,highlights
0,P01018539,master mattes liquid eyeliner,makeup by mario,Makeup,Eye,24.0,3.8428,159.0,"['Matte Finish', 'Waterproof', 'Long-wearing',..."
1,P02318798,master metallics eyeshadow palette,makeup by mario,Makeup,Eye,50.0,3.4063,96.0,
2,P04141879,liquid touch foundation brush,rare beauty by selena gomez,Makeup,Brushes & Applicators,29.0,4.5796,490.0,"['Vegan', 'Best for Dry, Combo, Normal Skin']"
3,P04456664,master eye prep set,makeup by mario,Makeup,Eye,30.0,3.5946,111.0,"['Light Coverage', 'Matte Finish', 'Pressed Po..."
4,P04546871,body lava body luminizer,fenty beauty by rihanna,Bath & Body,Body Moisturizers,59.0,3.8327,819.0,


In [None]:
# helper to turn a user profile + free-text need into a query string

def build_user_query_text(user_profile, user_query):
    parts = []

    if user_profile.get("skin_type"):
        parts.append(f"skin type: {user_profile['skin_type']}")
    if user_profile.get("skin_tone"):
        parts.append(f"skin tone: {user_profile['skin_tone']}")
    if user_profile.get("age_range"):
        parts.append(f"age range: {user_profile['age_range']}")
    if user_profile.get("preferences"):
        parts.append(f"preferences: {user_profile['preferences']}")

    if user_query:
        parts.append(f"need: {user_query}")

    return "\n".join(parts)


In [None]:
# retrieve top-k similar reviews from faiss using the query embedding

def retrieve_similar_reviews(user_profile, user_query, top_k=300):
    query_text = build_user_query_text(user_profile, user_query)
    query_emb = embedder.encode([query_text], convert_to_numpy=True)
    query_emb = query_emb.astype("float32")
    faiss.normalize_L2(query_emb)

    distances, indices = index.search(query_emb, top_k)
    idxs = indices[0]
    sims = distances[0]

    retrieved = df.iloc[idxs].copy()
    retrieved["similarity_score"] = sims

    return retrieved


In [None]:
# simple helper: count how many preference words appear in the product highlights

def compute_highlight_match(highlights, user_prefs):
    if not isinstance(highlights, str) or not isinstance(user_prefs, str):
        return 0

    highlights = highlights.lower()
    user_prefs = user_prefs.lower()

    pref_words = re.findall(r"\b[\w-]+\b", user_prefs)

    score = 0
    for w in pref_words:
        if len(w) > 2 and w in highlights:
            score += 1
    return score


In [None]:
def rank_products_from_reviews(
    review_rows,
    user_profile,
    top_n=5,
    target_primary=None,
    target_secondary=None,
):
    budget = user_profile.get("budget")

    if review_rows.empty:
        return product_agg.head(0)

    # aggregate similarity per product using review-level info
    agg = review_rows.groupby("product_id").agg(
        avg_sim=("similarity_score", "mean"),
        avg_rating_review=("rating_review", "mean"),
        n_reviews_hit=("product_id", "count"),
    ).reset_index()

    # join with product-level info (price, highlights, categories, etc.)
    merged = agg.merge(product_agg, on="product_id", how="left")

    # budget filter using product-level price
    if budget is not None and "price_usd" in merged.columns:
        merged = merged[merged["price_usd"] <= budget]

    # optional category filters
    if target_primary:
        merged = merged[
            merged["primary_category"].str.lower() == target_primary.lower()
        ]
    if target_secondary:
        merged = merged[
            merged["secondary_category"].str.lower() == target_secondary.lower()
        ]

    if merged.empty:
        return product_agg.head(0)

    # fill missing product rating with avg review rating if needed
    merged["rating_product"] = merged["rating_product"].fillna(
        merged["avg_rating_review"]
    )
    merged["n_reviews"] = merged["n_reviews"].fillna(0)

    # compute highlight match from product highlights and user prefs
    user_prefs = user_profile.get("preferences") or ""
    merged["highlight_match"] = merged.apply(
        lambda row: compute_highlight_match(row.get("highlights", ""), user_prefs),
        axis=1,
    )

    # scoring: mostly similarity, plus rating and highlights
    merged["score"] = (
        0.7 * merged["avg_sim"] +
        0.2 * merged["rating_product"] +
        0.1 * merged["highlight_match"]
    )

    merged = merged.sort_values("score", ascending=False)
    merged = merged.drop_duplicates(subset="product_id")

    return merged.head(top_n)


In [None]:
from openai import OpenAI

# OpenAI API key
os.environ["OPENAI_API_KEY"] = XXXXXXXX
client = OpenAI()

In [None]:
# turn ranked products into a small context list for the llm

def get_context_for_products(ranked_products):
    rows = []
    for _, row in ranked_products.iterrows():
        rows.append(
            {
                "product_name": row["product_name"],
                "brand_name": row["brand_name"],
                "primary_category": row["primary_category"],
                "secondary_category": row["secondary_category"],
                "price_usd": row["price_usd"],
                "rating_product": row["rating_product"],
            }
        )
    return rows


In [None]:
# generate a natural language recommendation explanation

def generate_recommendation_explanation(user_profile, user_query, ranked_products):
    context = get_context_for_products(ranked_products)

    sys_msg = (
        "you are a sephora beauty advisor. "
        "you are helping a user choose between a small set of products. "
        "only talk about the products listed in the recommendations field. "
        "do not invent or mention any products that are not in that list. "
        "for each recommended product, explain briefly why it matches the user's "
        "skin type, concerns, preferences, and budget. "
        "mention concrete attributes like category, key benefits, and price. "
        "keep the tone friendly but informative and avoid marketing buzzwords."
    )

    # format output
    user_msg = {
        "user_profile": user_profile,
        "user_query": user_query,
        "instructions": (
            "write 1–2 sentences of overview, then a short bullet list with one bullet "
            "per product: '- product name ($price): reason why it fits'."
        ),
        "recommendations": context,
    }

    response = client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": sys_msg},
            {"role": "user", "content": str(user_msg)},
        ],
        temperature=0.3,   # a bit lower = more consistent and grounded
        max_tokens=350,
    )

    return response.choices[0].message.content.strip()


In [None]:
# quick rule-based intent parser for category detection
def infer_desired_secondary(user_query):
    q = user_query.lower()

    # sunscreen-related
    if "sunscreen" in q or "spf" in q:
        return "Sunscreen"

    # cleansers
    if "cleanser" in q or "face wash" in q or "wash" in q:
        return "Cleansers"

    # moisturizers
    if "moisturizer" in q or "moisturiser" in q or "cream" in q or "gel cream" in q:
        return "Moisturizers"

    # serums
    if "serum" in q or "essence" in q:
        return "Treatments"

    # makeup categories
    if "concealer" in q:
        return "Face"
    if "foundation" in q:
        return "Face"
    if "mascara" in q:
        return "Eye"
    if "eyeshadow" in q:
        return "Eye"
    if "lip" in q:
        return "Lip"

    return None


In [None]:
def recommend_products(
    user_profile,
    user_query,
    n_products=5,
    target_primary=None,
    target_secondary=None,
):
    # retrieve similar reviews from the faiss index
    retrieved_reviews = retrieve_similar_reviews(
        user_profile,
        user_query,
        top_k=300,
    )

    # infer likely product-type intent from user query
    auto_secondary = infer_desired_secondary(user_query)

    # if user didn’t specify filters manually, use the inferred one
    effective_primary = target_primary
    effective_secondary = target_secondary or auto_secondary

    # rank products using review similarities and category filters
    ranked_products = rank_products_from_reviews(
        review_rows=retrieved_reviews,
        user_profile=user_profile,
        top_n=n_products,
        target_primary=effective_primary,
        target_secondary=effective_secondary,
    )

    if ranked_products is None or ranked_products.empty:
        return ranked_products, "no products found for this query."

    # generate llm explanation for the ranked products
    explanation = generate_recommendation_explanation(
        user_profile,
        user_query,
        ranked_products,
    )

    return ranked_products, explanation


In [None]:
user_profile = {
    "skin_type": "combination",
    "age_range": "25-34",
    "preferences": "lightweight, natural finish",
    "budget": 50,
}

test_query = "I need a sunscreen that works under makeup for combination skin"

ranked, expl = recommend_products(
    user_profile=user_profile,
    user_query=test_query,
    n_products=5,
)

ranked[["product_name", "primary_category", "secondary_category", "price_usd", "score"]]


Unnamed: 0,product_name,primary_category,secondary_category,price_usd,score
7,luxury sun ritual pore smoothing sunscreen spf 30,Skincare,Sunscreen,38.0,1.569682
4,full spectrum 360 refreshing water cream organ...,Skincare,Sunscreen,48.0,1.344809
5,mineral sunscreen zinc oxide broad spectrum sp...,Skincare,Sunscreen,28.0,1.308678
6,n 41 facial sunscreen mist with spf 41,Skincare,Sunscreen,30.0,1.30674
3,re setting 100 mineral powder sunscreen spf 35 pa,Skincare,Sunscreen,35.0,1.214579


In [None]:
def precision_at_k(pred_ids, true_ids, k):
    if k == 0:
        return 0.0
    pred_top = pred_ids[:k]
    if not pred_top:
        return 0.0
    inter = len(set(pred_top) & set(true_ids))
    return inter / len(pred_top)


In [None]:
def recall_at_k(pred_ids, true_ids, k):
    if not true_ids:
        return 0.0
    pred_top = pred_ids[:k]
    inter = len(set(pred_top) & set(true_ids))
    return inter / len(true_ids)


In [None]:
def jaccard_at_k(pred_ids, true_ids, k):
    pred_top = set(pred_ids[:k])
    true_set = set(true_ids)
    if not pred_top and not true_set:
        return 0.0
    inter = len(pred_top & true_set)
    union = len(pred_top | true_set)
    return inter / union if union > 0 else 0.0


In [None]:
def get_relevant_products_by_category(products_df, primary_cat, secondary_cat=None):
    mask = products_df["primary_category"].str.lower() == primary_cat.lower()
    if secondary_cat:
        mask &= products_df["secondary_category"].str.lower() == secondary_cat.lower()
    return products_df.loc[mask, "product_id"].tolist()


In [None]:
def recommend_products_no_llm(user_profile, user_query, n_products=5):
    retrieved = retrieve_similar_reviews(user_profile, user_query, top_k=300)
    ranked = rank_products_from_reviews(retrieved, user_profile, top_n=n_products)
    return ranked


In [None]:
eval_scenarios = [
   {
    "name": "skincare – sunscreen with no white cast for daily wear",
    "user_profile": {
        "skin_type": "combination",
        "skin_tone": "tan",
        "age_range": "25-34",
        "budget": 35,
        "preferences": "lightweight, no white cast, non-greasy",
    },
    "user_query": "i need a sunscreen that blends into tan skin, feels lightweight, and works under makeup.",
    "primary_category": "Skincare",
    "secondary_category": "Sunscreen",
},

   {
    "name": "skincare – acne treatment with salicylic acid",
    "user_profile": {
        "skin_type": "oily",
        "skin_tone": "",
        "age_range": "18-24",
        "budget": 30,
        "preferences": "salicylic acid, exfoliating, acne control",
    },
    "user_query": "i want an acne treatment with salicylic acid that helps reduce breakouts without over-drying.",
    "primary_category": "Skincare",
    "secondary_category": "Treatments",
},

   {
    "name": "skincare – barrier repair + deep hydration",
    "user_profile": {
        "skin_type": "dry",
        "skin_tone": "fair",
        "age_range": "35-44",
        "budget": 60,
        "preferences": "barrier-repair, ceramides, hydrating",
    },
    "user_query": "my skin barrier feels damaged. i need something deeply hydrating with ceramides.",
    "primary_category": "Skincare",
    "secondary_category": "Moisturizers",
},

   {
    "name": "skincare – fragrance-free moisturizer for sensitive skin",
    "user_profile": {
        "skin_type": "sensitive",
        "skin_tone": "",
        "age_range": "45-54",
        "budget": 50,
        "preferences": "fragrance-free, calming, gentle",
    },
    "user_query": "i need a gentle moisturizer that is completely fragrance-free and won’t irritate my sensitive skin.",
    "primary_category": "Skincare",
    "secondary_category": "Moisturizers",
},

   {
    "name": "skincare – beginner retinol (low irritation)",
    "user_profile": {
        "skin_type": "normal",
        "skin_tone": "",
        "age_range": "35-44",
        "budget": 45,
        "preferences": "retinol, gentle formula",
    },
    "user_query": "i want a gentle retinol for anti-aging that won’t irritate my skin.",
    "primary_category": "Skincare",
    "secondary_category": "Treatments",
}

]

In [None]:
def evaluate_scenarios(scenarios, products_df, k=5):
    rows = []
    for sc in scenarios:
        user_profile = sc["user_profile"]
        user_query = sc["user_query"]

        relevant_ids = get_relevant_products_by_category(
            products_df,
            sc["primary_category"],
            sc.get("secondary_category"),
        )

        ranked = recommend_products_no_llm(user_profile, user_query, n_products=k)
        pred_ids = ranked["product_id"].tolist()

        p = precision_at_k(pred_ids, relevant_ids, k)
        r = recall_at_k(pred_ids, relevant_ids, k)
        j = jaccard_at_k(pred_ids, relevant_ids, k)

        rows.append(
            {
                "scenario": sc["name"],
                "primary_category": sc["primary_category"],
                "secondary_category": sc.get("secondary_category"),
                "k": k,
                "precision@k": round(p, 3),
                "recall@k": round(r, 3),
                "jaccard": round(j, 3),
            }
        )

    return pd.DataFrame(rows)


In [None]:
eval_results = evaluate_scenarios(eval_scenarios, products_df, k=5)
eval_results


Unnamed: 0,scenario,primary_category,secondary_category,k,precision@k,recall@k,jaccard
0,skincare – sunscreen with no white cast for da...,Skincare,Sunscreen,5,0.6,0.028,0.027
1,skincare – acne treatment with salicylic acid,Skincare,Treatments,5,0.4,0.004,0.004
2,skincare – barrier repair + deep hydration,Skincare,Moisturizers,5,0.6,0.005,0.005
3,skincare – fragrance-free moisturizer for sens...,Skincare,Moisturizers,5,0.2,0.002,0.002
4,skincare – beginner retinol (low irritation),Skincare,Treatments,5,0.333,0.002,0.002


In [None]:
!pip install -q gradio
import gradio as gr


In [None]:
def gr_recommend(
    skin_type,
    budget,
    preferences,
    user_query,
    n_products,
):
    # build a clean user profile
    user_profile = {
        "skin_type": skin_type or None,
        "preferences": preferences.strip() if preferences else "",
        "budget": float(budget) if budget not in ("", None) else None,
    }

    # call recommendation pipeline
    ranked, explanation = recommend_products(
        user_profile=user_profile,
        user_query=user_query,
        n_products=int(n_products),
    )

    # if nothing came back
    if ranked is None or ranked.empty:
        clean = pd.DataFrame(
            columns=[
                "rank", "product", "brand", "price ($)",
                "category", "subcategory", "score",
            ]
        )
        return clean, "no products were found for this query."

    # attach price from products_df if needed
    ranked_display = ranked.merge(
        products_df[["product_id", "price_usd"]],
        on="product_id",
        how="left",
        suffixes=("", "_raw"),
    )

    # prefer raw price if available
    ranked_display["price_display"] = ranked_display["price_usd_raw"].fillna(
        ranked_display["price_usd"]
    )

    # final columns
    cols = [
        "product_name",
        "brand_name",
        "price_display",
        "primary_category",
        "secondary_category",
        "score",
    ]
    clean = ranked_display[cols].copy()

    clean = clean.rename(
        columns={
            "product_name": "product",
            "brand_name": "brand",
            "price_display": "price ($)",
            "primary_category": "category",
            "secondary_category": "subcategory",
            "score": "score",
        }
    )

    # clean formatting
    clean["price ($)"] = clean["price ($)"].astype(float).round(2)
    clean["score"] = clean["score"].round(3)

    # add rank
    clean.insert(0, "rank", range(1, len(clean) + 1))

    return clean, explanation


In [None]:
skin_type_choices = ["", "oily", "dry", "combination", "normal"]

with gr.Blocks() as demo:
    gr.Markdown("## sephora nlp recommender")

    with gr.Row():
        skin_type = gr.Dropdown(
            label="skin type",
            choices=skin_type_choices,
            value="",
        )
        budget = gr.Number(
            label="budget (usd)",
            value=50,
        )
        n_products = gr.Slider(
            label="number of products",
            minimum=1,
            maximum=10,
            step=1,
            value=5,
        )

    preferences = gr.Textbox(
        label="preferences",
        lines=2,
        placeholder="e.g. matte, non-comedogenic, fragrance-free",
    )

    user_query = gr.Textbox(
        label="describe what you're looking for",
        lines=3,
        placeholder="e.g. i want a daily moisturizer that controls shine and works well under makeup.",
    )

    run_button = gr.Button("recommend")

    output_table = gr.Dataframe(
        label="recommended products",
        interactive=False,
    )
    output_explanation = gr.Textbox(
        label="explanation",
        lines=10,
    )

    run_button.click(
        fn=gr_recommend,
        inputs=[
            skin_type,
            budget,
            preferences,
            user_query,
            n_products,
        ],
        outputs=[output_table, output_explanation],
    )

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d5ef187ccceb7035c6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# Experiment

## 1. Build Two Corpora (Full vs. Ablated)

In [None]:
import pandas as pd

# --- FULL VERSION: with highlights ---
def build_full_text(row):
    text = f"""
    PRODUCT NAME: {row['product_name']}
    DESCRIPTION: {row.get('description', '')}
    CATEGORY: {row.get('primary_category', '')}
    HIGHLIGHTS: {row.get('highlights', '')}
    PROS: {row.get('pros', '')}
    CONS: {row.get('cons', '')}
    """
    return text.strip()

products_df["full_text"] = products_df.apply(build_full_text, axis=1)

# --- ABLATED: remove highlights and engineered fields ---
def build_ablation_text(row):
    text = f"""
    PRODUCT NAME: {row['product_name']}
    DESCRIPTION: {row.get('description', '')}
    CATEGORY: {row.get('primary_category', '')}
    """
    return text.strip()

products_df["ablated_text"] = products_df.apply(build_ablation_text, axis=1)

## 2. Embed and Index Two Versions

In [None]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")

# full corpus
full_corpus = (
    products_df["full_text"].tolist() +
    reviews_df["review_text"].tolist()
)

# ablated corpus
ablated_corpus = (
    products_df["ablated_text"].tolist() +
    reviews_df["review_text"].tolist()
)

# embed both
full_embeddings = model.encode(full_corpus, show_progress_bar=True)
ablated_embeddings = model.encode(ablated_corpus, show_progress_bar=True)

# FAISS indexes
d = full_embeddings.shape[1]
full_index = faiss.IndexFlatIP(d)
ablated_index = faiss.IndexFlatIP(d)

full_index.add(full_embeddings)
ablated_index.add(ablated_embeddings)

Batches:   0%|          | 0/3994 [00:00<?, ?it/s]

Batches:   0%|          | 0/3994 [00:00<?, ?it/s]

## 3. Define Retrieval and RAG Answer Functions

In [None]:
def retrieve(query, index, corpus, top_k=5):
    q_emb = model.encode([query])
    scores, idxs = index.search(q_emb, top_k)
    retrieved = [corpus[i] for i in idxs[0]]
    return retrieved


def llm_answer(query, retrieved_chunks):
    """Replace with LLM (OpenAI, Claude, etc.)"""
    context = "\n\n".join(retrieved_chunks)
    prompt = f"""
    Use ONLY the information below to answer the query.

    CONTEXT:
    {context}

    QUESTION:
    {query}

    ANSWER:
    """
    return "< LLM call here>"  # fill this in


def rag_pipeline(query, index, corpus):
    chunks = retrieve(query, index, corpus)
    answer = llm_answer(query, chunks)
    return answer, chunks


## 4. Create Evaluation Queries

In [None]:
evaluation_queries = [
    "What are the key features of the Sony WH-1000XM5?",
    "Do users think the battery life is good for the Bose QC45?",
    "Which is better for gaming, the G733 or the Kraken?",
    "Is the Apple Watch Series 9 good for runners?",
    "Which laptop under $800 is best for college students?",
    # add 20–50 queries total
]


## 5. LLM-as-a-**Judge**

In [None]:
def judge_answer(query, answer_full, answer_ablated):
    judge_prompt = f"""
    Evaluate two answers to the user's question.

    QUESTION:
    {query}

    ANSWER A (Full RAG):
    {answer_full}

    ANSWER B (Ablated RAG):
    {answer_ablated}

    Score each from 1–5 on:
    - Accuracy
    - Completeness
    - Grounding
    - Lack of hallucination

    Then state which answer is better and why.
    Respond in JSON with keys:
    accuracy_a, accuracy_b, completeness_a, completeness_b, grounding_a, grounding_b, hallucinations_a, hallucinations_b, winner.
    """

    return "< LLM call with judge_prompt>"


## 6. Run the Ablation Experiment

In [None]:
results = []

for query in evaluation_queries:
    full_answer, _ = rag_pipeline(query, full_index, full_corpus)
    ablated_answer, _ = rag_pipeline(query, ablated_index, ablated_corpus)

    evaluation = judge_answer(query, full_answer, ablated_answer)

    results.append({
        "query": query,
        "full_answer": full_answer,
        "ablated_answer": ablated_answer,
        "evaluation": evaluation
    })


## 7. Analyze Results

In [None]:
import json
import pandas as pd
import numpy as np

# Convert evaluation JSON strings into dictionaries
for r in results:
    if isinstance(r["evaluation"], str):
        r["evaluation"] = json.loads(r["evaluation"])

df_results = pd.DataFrame(results)

# Extract winner
df_results["winner"] = df_results["evaluation"].apply(lambda x: x["winner"])
print(df_results["winner"].value_counts())

# Helper to compute average metric
def avg_score(key):
    return np.mean([e["evaluation"][key] for e in results])

print("Accuracy (Full):", avg_score("accuracy_a"))
print("Accuracy (Ablated):", avg_score("accuracy_b"))



JSONDecodeError: Expecting value: line 1 column 1 (char 0)