In [1]:
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("/Users/naveenapaleti/Projects/ShopTrack360/data/cleaned_ecommerce_data.csv")

# Aggregate by Customer-Product
user_product = df.groupby(['CustomerID', 'StockCode'])['Quantity'].sum().unstack().fillna(0)

In [3]:
svd = TruncatedSVD(n_components=20, random_state=42)
user_embeddings = svd.fit_transform(user_product)

In [4]:
# Pick a random customer
sample_customer = user_product.index[0]

# Predict product scores
customer_vec = user_embeddings[user_product.index.get_loc(sample_customer)]
product_scores = svd.components_.T @ customer_vec

# Rank top 5 unseen products
already_bought = user_product.loc[sample_customer][user_product.loc[sample_customer] > 0].index
recommended = pd.Series(product_scores, index=user_product.columns).drop(already_bought).sort_values(ascending=False)

print("Top 5 recommended products for Customer", sample_customer)
print(recommended.head(5))


Top 5 recommended products for Customer 12346
StockCode
17084R    4.658302
21169     3.755300
23230     3.716835
21166     3.417694
21668     3.036946
dtype: float64


In [5]:
# Drop nulls and duplicates
products = df[['StockCode', 'Description']].drop_duplicates().dropna()

# TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(products['Description'])

# Compute similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [6]:
# Map indices
indices = pd.Series(products.index, index=products['Description'])

def recommend_similar(desc, n=5):
    idx = indices[desc]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:n+1]
    product_indices = [i[0] for i in sim_scores]
    return products.iloc[product_indices][['StockCode', 'Description']]

# Example
recommend_similar('WHITE HANGING HEART T-LIGHT HOLDER')


Unnamed: 0,StockCode,Description
284212,22804,PINK HANGING HEART T-LIGHT HOLDER
83,21733,RED HANGING HEART T-LIGHT HOLDER
215,84970S,HANGING HEART ZINC T-LIGHT HOLDER
390137,85123A,CREAM HANGING HEART T-LIGHT HOLDER
788,21814,HEART T-LIGHT HOLDER
