In [1]:
# Model Training Notebook
# We'll train a simple semantic-retrieval based recommendation system using embeddings.
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_PATH = "../backend/app/data/products.csv"
df = pd.read_csv(DATA_PATH)
df['combined_text'] = df['title'].fillna('') + ' ' + df['description'].fillna('') + ' ' + df['categories'].fillna('')


In [3]:
model = SentenceTransformer('all-MiniLM-L6-v2')
sample_texts = df['combined_text'].tolist()
embs = model.encode(sample_texts, show_progress_bar=True, convert_to_numpy=True)
print("Embeddings shape:", embs.shape)


Batches: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]

Embeddings shape: (312, 384)





In [4]:
# simple recommend function
def recommend_for_text(query, top_k=5):
    q_emb = model.encode([query], convert_to_numpy=True)[0]
    sims = cosine_similarity([q_emb], embs)[0]
    idxs = np.argsort(-sims)[:top_k]
    return df.iloc[idxs][['uniq_id','title','brand','price','categories']]

# Evaluate with a few sample queries to check result quality
queries = [
    "small wooden side table for living room",
    "ergonomic office chair with armrest",
    "queen size bed with storage"
]
for q in queries:
    print("Query:", q)
    print(recommend_for_text(q, top_k=5))
    print("----------")


Query: small wooden side table for living room
                                  uniq_id  \
36   7df6de67-891e-567d-b8a5-3d184440a707   
102  0f7db59b-10e3-5fce-a0c4-380f718befe1   
246  487adf3a-9485-5500-9c98-bcc391eda169   
30   fb5af385-aee6-568c-a22f-e6b90ef92dac   
163  07b9d03a-02bc-5bc9-9133-a8e7b706cbc4   

                                                 title                  brand  \
36   FLYJOE Narrow Side Table with PU Leather Magaz...                 FLYJOE   
102  Get Set Style Black Glass Side Table, Square G...    Get Set Style Store   
246  3-Tier Side Table,Narrow End Table with Storag...              HomeToDou   
30   Flash Furniture Webb Commercial Grade 24" Roun...  Flash Furniture Store   
163  FurnitureR 27''H Round Drawer 2 Tiers Endtable...             FurnitureR   

       price                                         categories  
36    $49.99  ['Home & Kitchen', 'Furniture', 'Living Room F...  
102   $59.99  ['Home & Kitchen', 'Furniture', 'Living Room F...

In [5]:
# Use torchvision pretrained model to extract features for a sample image (if available)
from torchvision import models, transforms
import torch
from PIL import Image

resnet = models.resnet18(pretrained=True)
resnet.eval()
transform = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])

# If dataset includes images locally, pick one sample path
# img = Image.open('path_to_sample_image.jpg').convert('RGB')
# x = transform(img).unsqueeze(0)
# feat = resnet(x)
# print(feat.shape)




In [6]:
# Save embeddings to a .npy file so backend can load them quickly
np.save('../backend/app/data/embeddings.npy', embs)
print("Saved embeddings.")


Saved embeddings.
