In [None]:
#Match each cleaned search query to the most relevant products from the fashion catalog.

In [1]:
import pandas as pd

# Load cleaned search history and product catalog
df_search = pd.read_csv('cleaned_search_queries.csv')
df_catalog = pd.read_csv('fashion_catalog_cleaned.csv')


In [3]:
# Fill missing fields and create a combined description
df_catalog.fillna('', inplace=True)

# Create a combined field for matching
df_catalog['combined_text'] = (
    df_catalog['SHORT_DESCRIPTION'].astype(str).str.lower() + ' ' +
    df_catalog['CATEGORY'].astype(str).str.lower() + ' ' +
    df_catalog['LONG_DESCRIPTION'].astype(str).str.lower()
)

In [5]:
# Sample for speed: use only first 10 queries and first 500 products
df_catalog_sample = df_catalog.head(500).copy()
search_queries_sample = df_search['query'].astype(str).head(10).tolist()

# Rebuild combined text just in case
df_catalog_sample['combined_text'] = (
    df_catalog_sample['SHORT_DESCRIPTION'].astype(str).str.lower() + ' ' +
    df_catalog_sample['CATEGORY'].astype(str).str.lower() + ' ' +
    df_catalog_sample['LONG_DESCRIPTION'].astype(str).str.lower()
)


In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Vectorize product descriptions
vectorizer = TfidfVectorizer(stop_words='english')
product_tfidf = vectorizer.fit_transform(df_catalog_sample['combined_text'])

# Vectorize search queries
search_tfidf = vectorizer.transform(search_queries_sample)

# Match each query to top 3 products
matches = []
for i, query in enumerate(search_queries_sample):
    sim_scores = cosine_similarity(search_tfidf[i], product_tfidf).flatten()
    top_indices = sim_scores.argsort()[-3:][::-1]  # Top 3 matches

    for idx in top_indices:
        matches.append({
            'search_query': query,
            'matched_product': df_catalog_sample.iloc[idx]['SHORT_DESCRIPTION'],
            'category': df_catalog_sample.iloc[idx]['CATEGORY'],
            'match_score': round(sim_scores[idx], 3),
            'product_url': df_catalog_sample.iloc[idx]['LYST_PRODUCT_URL']
        })


In [9]:
# Convert results to DataFrame
df_matches = pd.DataFrame(matches)

# Preview
df_matches.head()


Unnamed: 0,search_query,matched_product,category,match_score,product_url
0,elon musk shivon zilis,Metropolis Mini Top Handle -- Light Handbag Le...,top handle bags,0.0,https://www.lyst.com/bags/furla-metropolis-min...
1,elon musk shivon zilis,Paisley Dress With Ruffles,dresses,0.0,https://www.lyst.com/clothing/mango-paisley-dr...
2,elon musk shivon zilis,Mules & Clogs,flats,0.0,https://www.lyst.com/shoes/ugg-tasman-shearlin...
3,bank station fire alert,Metropolis Mini Top Handle -- Light Handbag Le...,top handle bags,0.0,https://www.lyst.com/bags/furla-metropolis-min...
4,bank station fire alert,Paisley Dress With Ruffles,dresses,0.0,https://www.lyst.com/clothing/mango-paisley-dr...


In [11]:
df_matches.to_csv('product_recommendations_sample.csv', index=False)
print("Saved to product_recommendations_sample.csv")


Saved to product_recommendations_sample.csv
