In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# ----------------------
# 1. Data Preparation
# ----------------------
# Load and merge data
looks_df = pd.read_csv('looks.csv')  # Columns: look_id, category, product_id
products_df = pd.read_csv('products.csv')  # Columns: product_id, product_name
merged_df = pd.merge(looks_df, products_df, on='product_id', how='left')

# Group products by look
look_products = merged_df.groupby('look_id').agg({
    'product_name': list,
    'category': 'first',  # Take the first category (assumes same per look)
    'product_id': list
}).reset_index()

# ----------------------
# 2. Generate Embeddings
# ----------------------
# Initialize sentence transformer model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate product embeddings
products_df['product_embedding'] = products_df['product_name'].apply(
    lambda x: embedding_model.encode(x, convert_to_numpy=True)
)

# Create look embeddings by averaging product embeddings
def get_look_embedding(product_ids):
    product_embs = products_df[products_df['product_id'].isin(product_ids)]['product_embedding'].tolist()
    if not product_embs:  # Handle empty case
        return np.zeros(384)  # Default size for all-MiniLM-L6-v2
    return np.mean(np.stack(product_embs), axis=0)

look_products['look_embedding'] = look_products['product_id'].apply(get_look_embedding)

# ----------------------
# 3. PyTorch Model
# ----------------------
class RecommendationModel(nn.Module):
    def __init__(self, input_dim=384, hidden_dim=256, output_dim=128):
        super(RecommendationModel, self).__init__()
        self.query_encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )
        self.look_encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, query, look):
        query_emb = self.query_encoder(query)
        look_emb = self.look_encoder(look)
        return torch.cosine_similarity(query_emb, look_emb, dim=1)

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RecommendationModel().to(device)

# Note: Training is skipped here as SentenceTransformer embeddings are robust.
# If fine-tuning is desired, use paired query-look data and train as below:
"""
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
look_embeddings = torch.FloatTensor(np.stack(look_products['look_embedding'])).to(device)
for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(look_embeddings, look_embeddings)
    loss = criterion(outputs, torch.ones_like(outputs))
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')
"""

# ----------------------
# 5. Recommendation System
# ----------------------
def get_recommendations(user_query, top_k=5):
    # Encode user query
    query_embedding = embedding_model.encode(user_query, convert_to_numpy=True)
    query_tensor = torch.FloatTensor(query_embedding).unsqueeze(0).to(device)

    # Prepare look embeddings
    look_embeddings = torch.FloatTensor(np.stack(look_products['look_embedding'])).to(device)

    # Get similarities
    with torch.no_grad():
        model.eval()
        similarities = model(query_tensor.repeat(len(look_embeddings), 1), look_embeddings)

    # Get top-k looks
    _, indices = torch.topk(similarities, k=min(top_k, len(look_products)), dim=0)
    recommended_looks = look_products.iloc[indices.cpu().numpy()]

    # Aggregate unique product names
    all_product_names = []
    for _, row in recommended_looks.iterrows():
        all_product_names.extend(row['product_name'])
    # Return unique product names
    return list(dict.fromkeys(all_product_names))[:10]  # Limit to 10 unique products

# ----------------------
# 6. Example Usage
# ----------------------
if __name__ == "__main__":
    # Example query
    user_query = "casual summer outfit"

    # Get recommendations
    recommendations = get_recommendations(user_query)

    # Print results
    print(f"\nRecommendations for '{user_query}':")
    for idx, product in enumerate(recommendations, 1):
        print(f"{idx}. {product}")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Recommendations for 'casual summer outfit':
1. red printed polo shirt
2. gray silk plain skirt
3. brown printed flats
4. gray cotton polo shirt
5. white silk skirt
6. white printed flats
7. green leather polo shirt
8. pink wool striped skirt
9. pink nylon slim-fit polo shirt
10. black wool printed skirt
