In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# ----------------------
# 1. Data Preparation
# ----------------------
# Load CSV files
looks_df = pd.read_csv('looks.csv')  # Columns: look_id, category, product_id
products_df = pd.read_csv('products.csv')  # Columns: product_id, product_name

# Merge dataframes
merged_df = pd.merge(looks_df, products_df, on='product_id', how='left')

# Create look descriptions
look_descriptions = {}
for look_id, group in merged_df.groupby('look_id'):
    category = group['category'].iloc[0]  # Assume category is consistent per look
    product_names = ', '.join(group['product_name'].dropna())
    look_descriptions[look_id] = f"{category} look: {product_names}"

# ----------------------
# 2. Generate Embeddings
# ----------------------
# Load model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Generate embeddings for look descriptions
look_embeddings = {look_id: model.encode(desc, convert_to_numpy=True) for look_id, desc in look_descriptions.items()}

# ----------------------
# 3. Recommendation Function
# ----------------------
def recommend_look(user_input):
    # Validate input
    if not user_input or not isinstance(user_input, str) or user_input.strip() == '':
        return "Error: Please provide a valid input.", []

    # Encode user input
    try:
        user_embedding = model.encode(user_input, convert_to_numpy=True)
    except Exception as e:
        return f"Error encoding input: {str(e)}", []

    # Compute cosine similarities efficiently
    look_ids = list(look_embeddings.keys())
    embeddings = np.stack(list(look_embeddings.values()))
    similarities = cosine_similarity([user_embedding], embeddings)[0]

    # Find the best look
    best_idx = np.argmax(similarities)
    best_look_id = look_ids[best_idx]

    # Retrieve description and products
    description = look_descriptions.get(best_look_id, "No description available")
    products = merged_df[merged_df['look_id'] == best_look_id]['product_name'].dropna().tolist()

    if not products:
        return description, ["No products found for this look."]

    return description, products

# ----------------------
# 4. Example Usage
# ----------------------
if __name__ == "__main__":
    # Example query
    user_request = "I want a casual outfit with a blue top"

    # Get recommendation
    recommended_description, recommended_products = recommend_look(user_request)

    # Print results
    print(f"Recommended: {recommended_description}")
    print("Products:", recommended_products)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.51k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Recommended: formal look: blue nylon slim-fit dress shirt, blue skirt, brown denim oversized heels
Products: ['blue nylon slim-fit dress shirt', 'blue skirt', 'brown denim oversized heels']
