In [16]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

In [10]:
data = pd.read_csv("C:\\Users\\mdabd\\Downloads\\merged_data.csv", nrows=10000)

In [24]:
data.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_name,aisle_id,department_id,aisle,department,user_dow
0,2,33120,1,1,202279,prior,3,5,9,8.0,Organic Egg Whites,86,16,eggs,dairy eggs,202279-5
1,2,28985,2,1,202279,prior,3,5,9,8.0,Michigan Organic Kale,83,4,fresh vegetables,produce,202279-5
2,2,9327,3,0,202279,prior,3,5,9,8.0,Garlic Powder,104,13,spices seasonings,pantry,202279-5
3,2,45918,4,1,202279,prior,3,5,9,8.0,Coconut Butter,19,13,oils vinegars,pantry,202279-5
4,2,30035,5,0,202279,prior,3,5,9,8.0,Natural Sweetener,17,13,baking ingredients,pantry,202279-5


In [12]:
data.columns

Index(['order_id', 'product_id', 'add_to_cart_order', 'reordered', 'user_id',
       'eval_set', 'order_number', 'order_dow', 'order_hour_of_day',
       'days_since_prior_order', 'product_name', 'aisle_id', 'department_id',
       'aisle', 'department'],
      dtype='object')

In [13]:
data['user_dow'] = data['user_id'].astype(str) + '-' + data['order_dow'].astype(str)

In [14]:
interaction_matrix = data.pivot_table(index='user_dow', columns='product_id', values='order_id', aggfunc='count', fill_value=0)

In [48]:
interaction_matrix.head()

product_id,23,27,28,34,45,54,79,86,95,129,...,49533,49588,49605,49610,49618,49621,49628,49667,49683,cluster
user_dow,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100052-1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
100101-2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
100142-1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
100208-5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
10031-5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6


In [17]:
interaction_matrix_normalized = interaction_matrix.apply(lambda x: (x - np.mean(x)) / (np.std(x) + 1e-9), axis=1)

In [21]:
num_clusters = 10  # You can adjust this number
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
interaction_matrix['cluster'] = kmeans.fit_predict(interaction_matrix_normalized)




In [80]:
def get_recommendations(user_id, dow, n_recommendations=5):
    user_dow = f"{user_id}-{dow}"
    
    if user_dow not in interaction_matrix.index:
        print("User  -Dow combination not found.")
        return []
    
    user_cluster = interaction_matrix.loc[user_dow, 'cluster']
    
    # Find users in the same cluster
    similar_users = interaction_matrix[interaction_matrix['cluster'] == user_cluster].index
    
    # Get products that similar users have purchased
    product_ids = []
    for similar_user in similar_users:
        if similar_user != user_dow:  # Avoid recommending to self
            #print(interaction_matrix.loc[similar_user][interaction_matrix.loc[similar_user] > 0])
            filteresData = interaction_matrix.loc[similar_user][interaction_matrix.loc[similar_user] > 0]
            filteresData = filteresData.drop('cluster')
            product_ids.extend(filteresData.index.tolist())

    recommendations = pd.Series(product_ids).value_counts()
    return recommendations.head(n_recommendations)

In [81]:
user_id = 202279  # Replace with a valid user_id
dow = 5  # Replace with a valid day of the week (0-6, where 0 is Sunday)
recommended_products = get_recommendations(user_id, dow)
print("Recommended Products:")
print(recommended_products)

Recommended Products:
27104    15
24964    14
30489    10
24489     7
47766     6
Name: count, dtype: int64
