In [1]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('cartItemsWithRating.csv') # If you are loading from a file

# 1. Handle missing values
# Dropping rows where essential columns like user_id, product_id, or rating are missing
df_cleaned = df.dropna(subset=['customer_id', 'product_id', 'rating'])

# 2. Convert 'user_id' and 'product_id' to string to avoid issues in indexing
df_cleaned['customer_id'] = df_cleaned['customer_id'].astype(str)
df_cleaned['product_id'] = df_cleaned['product_id'].astype(str)

# 3. Remove any duplicate entries
df_cleaned = df_cleaned.drop_duplicates(subset=['customer_id', 'product_id'], keep='last')

# 4. Create a user-item matrix (pivot table)
user_item_matrix = df_cleaned.pivot(index='customer_id', columns='product_id', values='rating')

# 5. Fill NaN values with 0 (or alternatively, with a neutral rating, if you prefer)
user_item_matrix = user_item_matrix.fillna(0)

# 6. Normalize the data (optional, depends on the algorithm used)
# Here, we'll normalize ratings to center them around the user's mean rating
user_item_matrix_normalized = user_item_matrix.sub(user_item_matrix.mean(axis=1), axis=0)

print("Cleaned and Processed Data:")
print(df_cleaned.head())
print("\nUser-Item Matrix:")
print(user_item_matrix.head())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['customer_id'] = df_cleaned['customer_id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['product_id'] = df_cleaned['product_id'].astype(str)


Cleaned and Processed Data:
                                  product_name                product_id  \
1  Propods P9 Wireless Gaming Headphones Ipx47  632adeebe387a0ccbd419f38   
2  Propods P9 Wireless Gaming Headphones Ipx47  632adeebe387a0ccbd419f38   
3  Propods P9 Wireless Gaming Headphones Ipx47  632adeebe387a0ccbd419f38   
4  Propods P9 Wireless Gaming Headphones Ipx47  632adeebe387a0ccbd419f38   
5  Propods P9 Wireless Gaming Headphones Ipx47  632adeebe387a0ccbd419f38   

                category_id       category                       _id   status  \
1  638590305a556431bf362c0a  1000 Ma Bazar  636b55797e71f1fa8bd736b6  removed   
2  638590305a556431bf362c0a  1000 Ma Bazar  636ccaa7804ab22b48e0af07  removed   
3  638590305a556431bf362c0a  1000 Ma Bazar  636cdfeea5c31a2b645f6cc3  removed   
4  638590305a556431bf362c0a  1000 Ma Bazar  636f07e31a01845aedc57ebb  removed   
5  638590305a556431bf362c0a  1000 Ma Bazar  63710cef43574ca986227018  removed   

   is_order  is_payed       

In [2]:
df_cleaned

Unnamed: 0,product_name,product_id,category_id,category,_id,status,is_order,is_payed,url_key,sku_from_system,...,store_name,cart_id,added_by,customer_id,user_id,brand_id,brand,product_url_key,product_description,rating
1,Propods P9 Wireless Gaming Headphones Ipx47,632adeebe387a0ccbd419f38,638590305a556431bf362c0a,1000 Ma Bazar,636b55797e71f1fa8bd736b6,removed,False,False,propods-p9-wireless-gaming-headphones-ipx47,1378_105926346_NP-1027832760,...,Rk Jha Traders,636b55797e71f1fa8bd736b5,636b4ffd7e71f1fa8bd7356f,636b4ffd7e71f1fa8bd7356f,636b4ffd7e71f1fa8bd7356f,5e2aa5e3bc8d203bec624d50,Not Applicable,propods-p9-wireless-gaming-headphones-ipx47,"<ul>\n\t<li data-spm-anchor-id=""a2a0e.pdp.prod...",1.5
2,Propods P9 Wireless Gaming Headphones Ipx47,632adeebe387a0ccbd419f38,638590305a556431bf362c0a,1000 Ma Bazar,636ccaa7804ab22b48e0af07,removed,False,False,propods-p9-wireless-gaming-headphones-ipx47,1378_105926346_NP-1027832760,...,Rk Jha Traders,636ccaa7804ab22b48e0af06,6360e6483435d7cd920cdf24,6360e6483435d7cd920cdf24,6360e6483435d7cd920cdf24,5e2aa5e3bc8d203bec624d50,Not Applicable,propods-p9-wireless-gaming-headphones-ipx47,"<ul>\n\t<li data-spm-anchor-id=""a2a0e.pdp.prod...",1.5
3,Propods P9 Wireless Gaming Headphones Ipx47,632adeebe387a0ccbd419f38,638590305a556431bf362c0a,1000 Ma Bazar,636cdfeea5c31a2b645f6cc3,removed,False,False,propods-p9-wireless-gaming-headphones-ipx47,1378_105926346_NP-1027832760,...,Rk Jha Traders,636cdfeea5c31a2b645f6cc2,636cdf34804ab22b48e0b24c,636cdf34804ab22b48e0b24c,636cdf34804ab22b48e0b24c,5e2aa5e3bc8d203bec624d50,Not Applicable,propods-p9-wireless-gaming-headphones-ipx47,"<ul>\n\t<li data-spm-anchor-id=""a2a0e.pdp.prod...",2.5
4,Propods P9 Wireless Gaming Headphones Ipx47,632adeebe387a0ccbd419f38,638590305a556431bf362c0a,1000 Ma Bazar,636f07e31a01845aedc57ebb,removed,False,False,propods-p9-wireless-gaming-headphones-ipx47,1378_105926346_NP-1027832760,...,Rk Jha Traders,636f07d85458785af3fd9a9c,636f05e45458785af3fd9a78,636f05e45458785af3fd9a78,636f05e45458785af3fd9a78,5e2aa5e3bc8d203bec624d50,Not Applicable,propods-p9-wireless-gaming-headphones-ipx47,"<ul>\n\t<li data-spm-anchor-id=""a2a0e.pdp.prod...",2.5
5,Propods P9 Wireless Gaming Headphones Ipx47,632adeebe387a0ccbd419f38,638590305a556431bf362c0a,1000 Ma Bazar,63710cef43574ca986227018,removed,False,False,propods-p9-wireless-gaming-headphones-ipx47,1378_105926346_NP-1027832760,...,Rk Jha Traders,63710cef43574ca986227017,6370f52fd73294a9a96d7a05,6370f52fd73294a9a96d7a05,6370f52fd73294a9a96d7a05,5e2aa5e3bc8d203bec624d50,Not Applicable,propods-p9-wireless-gaming-headphones-ipx47,"<ul>\n\t<li data-spm-anchor-id=""a2a0e.pdp.prod...",1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663127,KTM DUKE Cake,5efaea39ffc2a23ef854cb13,5ef9ba85ffc2a23ef854a473,Cakes,6170dedd12b472b9138058a8,delivered,True,True,ktm-duke-cake,304_HC-B-51,...,Oho Cakes,60958c122d00b678274a534b,60958bb52d00b678274a5347,60958bb52d00b678274a5347,5f8fcc156a586f1d51ebf038,5ef9db6effc2a23ef854b121,Oho Cake,ktm-duke-cake,<ul>\n\t<li>Weight 2 Pound</li>\n\t<li><strong...,1.0
663128,Barbie Bash Cake,5efad502ffc2a23ef854c288,5ef9ba85ffc2a23ef854a473,Cakes,608e794f27181703b5846eed,delivered,True,True,barbie-bash-cake,304_HC-B-31,...,Oho Cakes,607bfbcd28e4fd1f54fe4ee2,6076891500dfc2127dd65e12,6076891500dfc2127dd65e12,6076891500dfc2127dd65e12,5ef9db6effc2a23ef854b121,Oho Cake,barbie-bash-cake,<ul>\n\t<li>Pound 3 Pound</li>\n\t<li><strong>...,2.0
663129,Jeevan Jiune Kaida,5ef98fabffc2a23ef854950a,5ea9573765382e2f086c4cdb,Books,6097aa9c609139ec388b7a8c,delivered,True,True,jeevan-jiune-kaida,305_978-9937-8924-5-2,...,NEPALAYA,60979da2cf6664dd0319687e,60910407d462bf741b5c1569,60910407d462bf741b5c1569,60910407d462bf741b5c1569,5e2aa5e3bc8d203bec624d50,Not Applicable,jeevan-jiune-kaida,<ul>\n\t<li>Author: Jeevan Kumar Prasain</li>\...,1.5
663131,Rookmangud Katawal (Nepali),5ef999adffc2a23ef854968b,5ea953fe65382e2f086c4ca1,Books,608bc0090fb8a98c32413fea,delivered,True,True,rookmangud-katawal-nepali,305_978-9937-8740-4-5,...,NEPALAYA,608ba3bc9c6677876ac95a6a,608b9fa383478982ad4f1856,608b9fa383478982ad4f1856,5ece09f899cd7f11130d7ca8,5e2aa5e3bc8d203bec624d50,Not Applicable,rookmangud-katawal-nepali,<ul>\n\t<li>Author : Rookmangud Katawal</li>\n...,2.0


In [3]:
from surprise import Reader, Dataset, KNNBasic
from surprise.model_selection import train_test_split as surprise_train_test_split
from surprise import accuracy

# Convert the cleaned DataFrame to Surprise dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_cleaned[['user_id', 'product_id', 'rating']], reader)

# Train-test split
trainset, testset = surprise_train_test_split(data, test_size=0.2, random_state=42)

# User-based collaborative filtering
algo_user = KNNBasic(sim_options={'user_based': True})

# Train the algorithm on the trainset, and predict ratings for the testset
algo_user.fit(trainset)
predictions_user = algo_user.test(testset)

# Compute RMSE
rmse_user = accuracy.rmse(predictions_user)


Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.0813


In [4]:
# Example: Get top-N recommendations for a specific user (e.g., user_id='101')
user_id = '5e817efd5d1501034c7e7647'
user_inner_id = algo_user.trainset.to_inner_uid(user_id)
user_neighbors = algo_user.get_neighbors(user_inner_id, k=10)

# Convert inner IDs to raw IDs for the recommendations
user_recommendations = [algo_user.trainset.to_raw_iid(inner_id) for inner_id in user_neighbors]
print(f"Top recommendations for user {user_id}: {user_recommendations}")

Top recommendations for user 5e817efd5d1501034c7e7647: ['5ffec57c6d380c0862375b42', '60d330513d2c4e154e5c7dd3', '5ffe98ff6d380c08623758fd', '60a1089583ea5bb6d42f75d1', '6214998e9f391954ea5a0e91', '5e92d5b7206e4c243b0f5c23', '627753d1d28bde77ec6857cd', '60f51545c03365e03673ad26', '62bea74ab04be713f3efbc25', '60e2d6a0b23abf5d0863e8dd']


In [5]:
# Item-based collaborative filtering
algo_item = KNNBasic(sim_options={'user_based': False})

# Train the algorithm on the trainset, and predict ratings for the testset
algo_item.fit(trainset)
predictions_item = algo_item.test(testset)

# Compute RMSE
rmse_item = accuracy.rmse(predictions_item)

# Example: Get top-N recommendations for a specific user (e.g., user_id='101')
user_inner_id_item = algo_item.trainset.to_inner_uid(user_id)
item_neighbors = algo_item.get_neighbors(user_inner_id_item, k=10)

# Convert inner IDs to raw IDs for the recommendations
item_recommendations = [algo_item.trainset.to_raw_iid(inner_id) for inner_id in item_neighbors]
print(f"Top item-based recommendations for user {user_id}: {item_recommendations}")


Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.1247
Top item-based recommendations for user 5e817efd5d1501034c7e7647: ['62b2c85ae0d735573aab0ab9', '605d8c9e05fab268763998e4', '5facecd47721ea307a62502f', '61ea7b896f45c07e3b001ddd', '62a06c92a473abe75617bd4b', '5f4d35fb6a07b964402bba7f', '601fba44ed2f557ec86a88b5', '5eff07c9effdd92d6d1539bd', '6214892bcf8fc854c7362e61', '606ea9f43d6250467444e4f6']


### Content Based Filtering

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder
from scipy.sparse import csr_matrix

# Step 1: Data Preparation
# Assuming your dataframe is named df

# Selecting relevant columns
df_content = df[['product_id', 'product_name', 'category', 'brand']]

In [None]:
# # Step 2: Text Preprocessing
# # Clean and preprocess the description column
# def preprocess_text(text):
#     # Here you can add more preprocessing steps like stopwords removal, lemmatization, etc.
#     return text.lower()

# df_content['cleaned_description'] = df_content['product_description'].apply(preprocess_text)

In [7]:
df_content

Unnamed: 0,product_id,product_name,category,brand
0,632adeebe387a0ccbd419f38,Propods P9 Wireless Gaming Headphones Ipx47,1000 Ma Bazar,Not Applicable
1,632adeebe387a0ccbd419f38,Propods P9 Wireless Gaming Headphones Ipx47,1000 Ma Bazar,Not Applicable
2,632adeebe387a0ccbd419f38,Propods P9 Wireless Gaming Headphones Ipx47,1000 Ma Bazar,Not Applicable
3,632adeebe387a0ccbd419f38,Propods P9 Wireless Gaming Headphones Ipx47,1000 Ma Bazar,Not Applicable
4,632adeebe387a0ccbd419f38,Propods P9 Wireless Gaming Headphones Ipx47,1000 Ma Bazar,Not Applicable
...,...,...,...,...
663128,5efad502ffc2a23ef854c288,Barbie Bash Cake,Cakes,Oho Cake
663129,5ef98fabffc2a23ef854950a,Jeevan Jiune Kaida,Books,Not Applicable
663130,5ef98ef3ffc2a23ef85494fb,Palpasa Cafe Nepali,Books,Not Applicable
663131,5ef999adffc2a23ef854968b,Rookmangud Katawal (Nepali),Books,Not Applicable


In [8]:
# Step 2: Feature Engineering
# One-hot encode categories and brands
onehot_encoder = OneHotEncoder()
category_onehot = onehot_encoder.fit_transform(df_content[['category']])
brand_onehot = onehot_encoder.fit_transform(df_content[['brand']])

# Combine all features into a single matrix
combined_features = np.hstack((category_onehot.toarray(), brand_onehot.toarray()))


In [None]:
# Step 3: Cosine Similarity
# Compute cosine similarity matrix using sparse matrix
cosine_sim = cosine_similarity(combined_features, dense_output=False)

In [None]:
# Step 4: Recommendation Functions

# General recommendation based on category and brand
def get_recommendations(product_id, cosine_sim=cosine_sim, df=df_content):
    # Find the index of the product that matches the product_id
    idx = df[df['product_id'] == product_id].index[0]

    # Get the pairwise similarity scores of all products with that product
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the products based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar products
    sim_scores = sim_scores[1:11]

    # Get the product indices
    product_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar products
    return df['product_name'].iloc[product_indices]

In [None]:
# Category-based recommendation
def get_category_recommendations(product_id, df=df_content):
    # Find the category of the given product_id
    product_category = df[df['product_id'] == product_id]['category'].values[0]
    
    # Filter products that belong to the same category
    category_products = df[df['category'] == product_category]
    
    # Exclude the input product itself
    category_products = category_products[category_products['product_id'] != product_id]
    
    # Return the names of the products in the same category
    return category_products['product_name'].tolist()

In [None]:
# Brand-based recommendation
def get_brand_recommendations(product_id, df=df_content):
    # Find the brand of the given product_id
    product_brand = df[df['product_id'] == product_id]['brand'].values[0]
    
    # Filter products that belong to the same brand
    brand_products = df[df['brand'] == product_brand]
    
    # Exclude the input product itself
    brand_products = brand_products[brand_products['product_id'] != product_id]
    
    # Return the names of the products in the same brand
    return brand_products['product_name'].tolist()

In [None]:
# General recommendations
recommended_products = get_recommendations(product_id='5e86177ce463693ae6692294')
print("General Recommendations:", recommended_products)

# Category-based recommendations
category_recommendations = get_category_recommendations(product_id='5e86177ce463693ae6692294')
print("Category-Based Recommendations:", category_recommendations)

# Brand-based recommendations
brand_recommendations = get_brand_recommendations(product_id='5e86177ce463693ae6692294')
print("Brand-Based Recommendations:", brand_recommendations)