In [None]:
#Project: Product Recommender System with Retailrocket Dataset

In [None]:
#Step 1: Loadin and Exploring Dataset

In [None]:
# Step 2: Show the different event types in the dataset

In [8]:
import pandas as pd

# Load the events.csv dataset
events = pd.read_csv('events.csv')

# Show the first few rows
events.head()


Unnamed: 0,timestamp,visitorid,event,itemid,transactionid
0,1433221332117,257597,view,355908,
1,1433224214164,992329,view,248676,
2,1433221999827,111016,view,318965,
3,1433221955914,483717,view,253185,
4,1433221337106,951259,view,367447,


In [14]:
import pandas as pd

events = pd.read_csv('events.csv')
transactions = events[events['event'] == 'transaction'].copy()
transactions.drop_duplicates(subset=['visitorid', 'itemid'], inplace=True)
transactions.rename(columns={'visitorid': 'user_id', 'itemid': 'item_id'}, inplace=True)
transactions['rating'] = 1
transactions = transactions[['user_id', 'item_id', 'rating']]


In [9]:

events['event'].value_counts()


event
view           2664312
addtocart        69332
transaction      22457
Name: count, dtype: int64

In [None]:
# Step 3: Prepare the Data for Recommender Model

In [15]:
# Filter to only transaction events
transactions = events[events['event'] == 'transaction'].copy()

# Drop duplicates (same user buying same item multiple times)
transactions.drop_duplicates(subset=['visitorid', 'itemid'], inplace=True)

# Rename columns for clarity
transactions.rename(columns={
    'visitorid': 'user_id',
    'itemid': 'item_id'
}, inplace=True)

# Assign rating = 1 to all transactions (implicit feedback)
transactions['rating'] = 1

# Keep only the necessary columns
transactions = transactions[['user_id', 'item_id', 'rating']]

# Display final data
transactions.head()


Unnamed: 0,user_id,item_id,rating
130,599528,356475,1
304,121688,15335,1
418,552148,81345,1
814,102019,150318,1
843,189384,310791,1


In [None]:
# Step 4: Build and Train the Collaborative Filtering Model

In [11]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Format the data for surprise
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(transactions[['user_id', 'item_id', 'rating']], reader)

# Split the data
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train the model using SVD
model = SVD()
model.fit(trainset)

# Predict on the test set
predictions = model.test(testset)

# Evaluate with RMSE
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse:.3f}")


RMSE: 0.0331
RMSE: 0.033


In [16]:
# Function to get top N recommendations
def get_top_recommendations(user_id, model, transactions_df, n=20):
    all_items = transactions_df['item_id'].unique()
    bought_items = transactions_df[transactions_df['user_id'] == int(user_id)]['item_id'].tolist()
    unseen_items = [str(i) for i in all_items if i not in bought_items]

    predictions = []
    for item_id in unseen_items[:1000]:  # limit for speed
        pred = model.predict(str(user_id), item_id)
        predictions.append((item_id, pred.est))

    top_n = sorted(predictions, key=lambda x: x[1], reverse=True)[:n]
    return top_n


In [None]:
# Step 5: Generate Recommendations for a Specific User

In [20]:
# Choose a target user
target_user = str(transactions['user_id'].iloc[0])

# Top Picks (based on CF model)
top_picks = get_top_recommendations(target_user, model, transactions, n=10)

# Recently Viewed (mocked from events log)
recently_viewed = events[(events['visitorid'] == int(target_user)) & (events['event'] == 'view')]
recently_viewed = recently_viewed.sort_values(by='timestamp', ascending=False)['itemid'].unique()[:5]

# Customers Also Bought (co-occurrence logic)
user_bought = transactions[transactions['user_id'] == int(target_user)]['item_id']
related_users = transactions[transactions['item_id'].isin(user_bought)]['user_id'].unique()
related_items = transactions[transactions['user_id'].isin(related_users)]['item_id']
also_bought = pd.Series(related_items).value_counts().head(10).index.tolist()


In [18]:
print(f"📦 Top Picks for You ({target_user}):")
for item_id, score in top_picks:
    print(f"  Product ID: {item_id}, Predicted Score: {score:.2f}")

print(f"\n🕵️ Recently Viewed:")
for item in recently_viewed:
    print(f"  Product ID: {item}")

print(f"\n🛍️ Customers Also Bought:")
for item in also_bought:
    print(f"  Product ID: {item}")


📦 Top Picks for You (599528):
  Product ID: 15335, Predicted Score: 1.00
  Product ID: 81345, Predicted Score: 1.00
  Product ID: 150318, Predicted Score: 1.00
  Product ID: 310791, Predicted Score: 1.00
  Product ID: 54058, Predicted Score: 1.00
  Product ID: 284871, Predicted Score: 1.00
  Product ID: 150100, Predicted Score: 1.00
  Product ID: 243566, Predicted Score: 1.00
  Product ID: 245400, Predicted Score: 1.00
  Product ID: 336832, Predicted Score: 1.00

🕵️ Recently Viewed:
  Product ID: 64279
  Product ID: 356475

🛍️ Customers Also Bought:
  Product ID: 356475
  Product ID: 27926
  Product ID: 414755
  Product ID: 187649
  Product ID: 63543
  Product ID: 268883
  Product ID: 230842
  Product ID: 395849
  Product ID: 17478
  Product ID: 128990


In [19]:
# Pick a user from the dataset
target_user = str(transactions['user_id'].iloc[0])

# All products
all_products = transactions['item_id'].unique()

# Products the user already bought
bought_products = transactions[transactions['user_id'] == int(target_user)]['item_id'].tolist()

# Filter out bought products
unseen_products = [str(i) for i in all_products if i not in bought_products]

# Predict ratings for unseen products
predicted_ratings = []
for product in unseen_products[:500]:  # Sample for speed
    pred = model.predict(target_user, product)
    predicted_ratings.append((product, pred.est))

# Top 5 recommended products
top_5 = sorted(predicted_ratings, key=lambda x: x[1], reverse=True)[:5]

# Show recommendations
print(f"Top 5 recommendations for user {target_user}:")
for item_id, score in top_5:
    print(f"Product ID: {item_id}, Predicted Rating: {score:.2f}")


Top 5 recommendations for user 599528:
Product ID: 15335, Predicted Rating: 1.00
Product ID: 81345, Predicted Rating: 1.00
Product ID: 150318, Predicted Rating: 1.00
Product ID: 310791, Predicted Rating: 1.00
Product ID: 54058, Predicted Rating: 1.00
