In [None]:
import pandas as pd
import numpy as np
from transformers import BertTokenizer, BertModel
import torch

# Load and merge CSV files
looks_df = pd.read_csv("looks.csv")  # Columns: look_id, category, product_id
products_df = pd.read_csv("products.csv")  # Columns: product_id, product_name
merged_df = looks_df.merge(products_df, on="product_id")

# Create look descriptions
look_descriptions = {}
for look_id, group in merged_df.groupby("look_id"):
    category = group["category"].iloc[0]  # Assume category is same for all products in a look
    product_names = ", ".join(group["product_name"])
    description = f"{category} look: {product_names}"
    look_descriptions[look_id] = description

# Load BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

# Function to get BERT embedding
def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].squeeze().numpy()  # CLS token embedding

# Precompute embeddings for all looks
look_embeddings = {look_id: get_embedding(desc) for look_id, desc in look_descriptions.items()}

# Recommendation function
def recommend_look(user_input):
    user_embedding = get_embedding(user_input)
    similarities = {
        look_id: np.dot(user_embedding, emb) / (np.linalg.norm(user_embedding) * np.linalg.norm(emb))
        for look_id, emb in look_embeddings.items()
    }
    best_look_id = max(similarities, key=similarities.get)
    return look_descriptions[best_look_id], merged_df[merged_df["look_id"] == best_look_id]["product_name"].tolist()

# Example usage
user_request = "I want a casual outfit with a blue top"
recommended_description, recommended_products = recommend_look(user_request)
print(f"Recommended: {recommended_description}")
print("Products:", recommended_products)