In [9]:
#we load the dataset to the notebook in this cell
import pandas as pd

data = pd.read_csv('/content/shopping_behavior_updated.csv')

print(data.head())


   Customer ID  Age Gender Item Purchased  Category  Purchase Amount (USD)  \
0            1   55   Male         Blouse  Clothing                     53   
1            2   19   Male        Sweater  Clothing                     64   
2            3   50   Male          Jeans  Clothing                     73   
3            4   21   Male        Sandals  Footwear                     90   
4            5   45   Male         Blouse  Clothing                     49   

        Location Size      Color  Season  Review Rating Subscription Status  \
0       Kentucky    L       Gray  Winter            3.1                 Yes   
1          Maine    L     Maroon  Winter            3.1                 Yes   
2  Massachusetts    S     Maroon  Spring            3.1                 Yes   
3   Rhode Island    M     Maroon  Spring            3.5                 Yes   
4         Oregon    M  Turquoise  Spring            2.7                 Yes   

   Shipping Type Discount Applied Promo Code Used  Previ

In [10]:
#we display the dataset to view about the contents and details of the dataset
print(data.info())
print(data.describe())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3900 entries, 0 to 3899
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Customer ID             3900 non-null   int64  
 1   Age                     3900 non-null   int64  
 2   Gender                  3900 non-null   object 
 3   Item Purchased          3900 non-null   object 
 4   Category                3900 non-null   object 
 5   Purchase Amount (USD)   3900 non-null   int64  
 6   Location                3900 non-null   object 
 7   Size                    3900 non-null   object 
 8   Color                   3900 non-null   object 
 9   Season                  3900 non-null   object 
 10  Review Rating           3900 non-null   float64
 11  Subscription Status     3900 non-null   object 
 12  Shipping Type           3900 non-null   object 
 13  Discount Applied        3900 non-null   object 
 14  Promo Code Used         3900 non-null   

In [13]:
#we create the item id which is missing in the dataset
data['Item_ID'] = data['Item Purchased'].astype('category').cat.codes

print(data.head())

   Customer ID  Age Gender Item Purchased  Category  Purchase Amount (USD)  \
0            1   55   Male         Blouse  Clothing                     53   
1            2   19   Male        Sweater  Clothing                     64   
2            3   50   Male          Jeans  Clothing                     73   
3            4   21   Male        Sandals  Footwear                     90   
4            5   45   Male         Blouse  Clothing                     49   

        Location Size      Color  Season  Review Rating Subscription Status  \
0       Kentucky    L       Gray  Winter            3.1                 Yes   
1          Maine    L     Maroon  Winter            3.1                 Yes   
2  Massachusetts    S     Maroon  Spring            3.1                 Yes   
3   Rhode Island    M     Maroon  Spring            3.5                 Yes   
4         Oregon    M  Turquoise  Spring            2.7                 Yes   

   Shipping Type Discount Applied Promo Code Used  Previ

In [16]:
#we use the train test split and train the model
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split

user_item_data = data.groupby(['Customer ID', 'Item_ID']).size().reset_index(name='rating')

reader = Reader(rating_scale=(0, user_item_data['rating'].max()))
surprise_data = Dataset.load_from_df(user_item_data[['Customer ID', 'Item_ID', 'rating']], reader)


trainset, testset = train_test_split(surprise_data, test_size=0.2, random_state=42)


model = SVD()
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7e148e262e50>

In [23]:
#we design the recommendation system
def recommend_top_items(customer_id, model, data, num_recommendations=4):
    """
    Recommend top N items for a given customer.
    """

    all_items = data['Item_ID'].unique()


    purchased_items = data[data['Customer ID'] == customer_id]['Item_ID'].tolist()


    items_to_predict = [item for item in all_items if item not in purchased_items]


    predictions = [model.predict(customer_id, item_id) for item_id in items_to_predict]


    predictions.sort(key=lambda x: x.est, reverse=True)


    top_recommendations = [(pred.iid, pred.est) for pred in predictions[:num_recommendations]]
    return top_recommendations


customer_id = '2'
recommendations = recommend_top_items(customer_id, model, user_item_data, num_recommendations=4)
print(f"Top 4 recommended items for Customer {customer_id}: {recommendations}")


Top 4 recommended items for Customer 2: [(23, 1), (14, 1), (16, 1), (18, 1)]


In [24]:
#we turn the item id into item purchased
item_map = data[['Item_ID', 'Item Purchased']].drop_duplicates()

recommended_items = [(item_map[item_map['Item_ID'] == item_id]['Item Purchased'].values[0], rating)
                     for item_id, rating in recommendations]

print(f"Top 4 recommended items for Customer {customer_id}: {recommended_items}")


Top 4 recommended items for Customer 2: [('Sweater', 1), ('Sandals', 1), ('Shirt', 1), ('Shorts', 1)]
