## Import Libraries

In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from scipy.sparse import csr_matrix

## EDA

In [4]:
# df = pd.read_csv('model\synthetic_food_orders.csv')
df = pd.read_csv(r'C:\Users\user\desktop\projects\blockbite\model\synthetic_food_orders.csv')

In [5]:
df.head()

Unnamed: 0,user_id,item,frequency
0,1,Jollof Rice,2
1,1,Pasta,1
2,1,Pizza,5
3,1,Goat Meat,1
4,2,Fried Rice,2


In [6]:
data_shape = df.shape
print(data_shape)
df.isnull().sum()

(328, 3)


user_id      0
item         0
frequency    0
dtype: int64

In [7]:
unique_users = df['user_id'].nunique()
unique_items = df['item'].nunique()
print(f'Number of unique users: {unique_users}')
print(f'Number of unique items: {unique_items}')

Number of unique users: 50
Number of unique items: 20


In [8]:
food_popularity =  df.groupby('item')['user_id'].nunique()

print(food_popularity.sort_values())


item
Amala             9
Chicken Wings    13
Efo Riro         13
Ice Cream        13
Shawarma         14
Pounded Yam      15
Goat Meat        15
Plantain         15
Salad            15
Suya             16
Fried Rice       17
Beans            17
Sandwich         18
Pizza            18
Fish Stew        18
Egusi Soup       19
Fufu             20
Jollof Rice      20
Burger           21
Pasta            22
Name: user_id, dtype: int64


In [9]:
rare_foods = food_popularity[food_popularity <= 2]
print(rare_foods)

Series([], Name: user_id, dtype: int64)


In [10]:
user_activity = df.groupby('user_id')['frequency'].sum()
print(user_activity.sort_values())
print(user_activity.describe()) 

inactive_users = user_activity[user_activity <= 5] 
print(inactive_users) 

user_id
11     5
4      7
14     9
1      9
15     9
43     9
30    10
26    11
48    11
5     12
29    12
21    13
20    14
10    15
23    16
16    16
44    16
39    16
22    17
33    17
45    18
46    18
25    18
24    19
40    19
49    19
50    20
6     20
12    21
13    21
17    21
31    21
36    23
19    23
18    24
34    24
27    24
28    26
35    27
37    27
2     28
8     28
9     28
3     29
42    29
32    29
47    30
7     31
41    34
38    36
Name: frequency, dtype: int64
count    50.000000
mean     19.580000
std       7.529778
min       5.000000
25%      14.250000
50%      19.000000
75%      25.500000
max      36.000000
Name: frequency, dtype: float64
user_id
11    5
Name: frequency, dtype: int64


In [11]:
user_food_variety = df.groupby('user_id')['item'].nunique()
print(user_food_variety.sort_values())
print(user_food_variety.describe())

user_id
15     3
11     3
30     3
48     3
43     3
14     4
10     4
1      4
5      4
4      4
21     5
23     5
49     5
46     5
20     5
29     5
26     6
31     6
40     6
45     6
19     6
22     6
24     6
50     6
33     6
39     6
16     6
25     7
12     7
35     7
34     7
13     7
17     8
32     8
6      8
44     8
27     8
8      9
3      9
7      9
47     9
28     9
2      9
18     9
41     9
9     10
37    10
38    10
42    10
36    10
Name: item, dtype: int64
count    50.000000
mean      6.560000
std       2.177435
min       3.000000
25%       5.000000
50%       6.000000
75%       8.750000
max      10.000000
Name: item, dtype: float64


In [12]:
def filter_data(df, rare_foods, inactive_users):
    df_filtered = df[~df['item'].isin(rare_foods.index)]
    df_filtered = df_filtered[~df_filtered['user_id'].isin(inactive_users.index)]
    return df_filtered

In [13]:
df1_filtered = filter_data(df, rare_foods, inactive_users)
print(df1_filtered.shape)
df1_filtered.head()

(325, 3)


Unnamed: 0,user_id,item,frequency
0,1,Jollof Rice,2
1,1,Pasta,1
2,1,Pizza,5
3,1,Goat Meat,1
4,2,Fried Rice,2


In [14]:
def create_user_item_matrix(df):
    user_item_matrix = df.pivot_table(index='user_id', columns='item', values='frequency', fill_value=0)
    return user_item_matrix

In [20]:
df_user_item = create_user_item_matrix(df1_filtered)
print(df_user_item.shape)
df_user_item.head()

(49, 20)


item,Amala,Beans,Burger,Chicken Wings,Efo Riro,Egusi Soup,Fish Stew,Fried Rice,Fufu,Goat Meat,Ice Cream,Jollof Rice,Pasta,Pizza,Plantain,Pounded Yam,Salad,Sandwich,Shawarma,Suya
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,2.0,0.0,0.0,4.0,0.0,2.0,0.0,0.0,0.0,5.0,5.0,4.0,2.0,0.0,1.0,3.0,0.0,0.0
3,0.0,4.0,3.0,0.0,0.0,0.0,1.0,4.0,3.0,0.0,1.0,0.0,3.0,0.0,5.0,0.0,0.0,0.0,0.0,5.0
4,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
5,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,0.0,2.0,0.0,0.0,0.0,0.0


In [27]:
min_frequency = 2
user_item_matrix = df_user_item.where(df_user_item>=min_frequency, 0)
user_item_matrix.head()

item,Amala,Beans,Burger,Chicken Wings,Efo Riro,Egusi Soup,Fish Stew,Fried Rice,Fufu,Goat Meat,Ice Cream,Jollof Rice,Pasta,Pizza,Plantain,Pounded Yam,Salad,Sandwich,Shawarma,Suya
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,2.0,0.0,0.0,4.0,0.0,2.0,0.0,0.0,0.0,5.0,5.0,4.0,2.0,0.0,0.0,3.0,0.0,0.0
3,0.0,4.0,3.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,3.0,0.0,5.0,0.0,0.0,0.0,0.0,5.0
4,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,0.0,2.0,0.0,0.0,0.0,0.0


In [28]:
normalized_df = user_item_matrix.copy()
normalized_df.loc[:, :] = normalize(user_item_matrix.values, norm='l2', axis=1)

print(normalized_df.index)

Index([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 12, 13, 14, 15, 16, 17, 18, 19,
       20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
       38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50],
      dtype='int64', name='user_id')


In [30]:
user_ids = normalized_df.index.tolist()
similarity_matrix = cosine_similarity(normalized_df.values)

In [39]:
print(user_ids)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]


In [31]:
print(similarity_matrix)

[[1.         0.54891316 0.         ... 0.         0.         0.        ]
 [0.54891316 1.         0.36807197 ... 0.         0.17066404 0.62576814]
 [0.         0.36807197 1.         ... 0.23933973 0.27650063 0.29862068]
 ...
 [0.         0.         0.23933973 ... 1.         0.45083482 0.        ]
 [0.         0.17066404 0.27650063 ... 0.45083482 1.         0.16      ]
 [0.         0.62576814 0.29862068 ... 0.         0.16       1.        ]]


In [32]:
pd.DataFrame(similarity_matrix, index=user_ids, columns=user_ids).to_csv('similarity_matrix.csv')

In [33]:
def check_overlap(user1_id, user2_id, user_item_df, min_overlap=3):
    """Check if two users have enough items in common"""
    user1_items = user_item_df.loc[user1_id] > 0
    user2_items = user_item_df.loc[user2_id] > 0
    common_items = (user1_items & user2_items).sum()
    return common_items >= min_overlap

In [34]:

def get_similar_users(target_user_id, similarity_matrix, user_ids, normalized_df, min_overlap=3):
    """Find similar users with overlap filtering"""
    user_index = user_ids.index(target_user_id)
    similarities = similarity_matrix[user_index]
    
    valid_users = []
    for i, sim_score in enumerate(similarities):
        if i != user_index:  
            other_user_id = user_ids[i]
            if check_overlap(target_user_id, other_user_id, normalized_df, min_overlap):
                valid_users.append((other_user_id, sim_score))
    
    return sorted(valid_users, key=lambda x: x[1], reverse=True)


In [35]:
def generate_recommendations(target_user_id, similar_users, user_item_df, top_n=5):
    """Generate food recommendations based on similar users"""
    target_user_prefs = user_item_df.loc[target_user_id]
    
    recommendations = {}
    
    for similar_user_id, similarity_score in similar_users[:5]:  
        similar_user_prefs = user_item_df.loc[similar_user_id]
        
        for food, frequency in similar_user_prefs.items():
            if frequency > 0 and target_user_prefs[food] == 0:  
                if food not in recommendations:
                    recommendations[food] = 0
                recommendations[food] += frequency * similarity_score  
    
    sorted_recs = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    return [food for food, score in sorted_recs[:top_n]]


In [None]:
def recommend_for_user(target_user_id, similarity_matrix, user_ids, normalized_df, top_n=5):
    """Main recommendation function"""
    if target_user_id not in user_ids:
        return f"User {target_user_id} not found!"
    
    similar_users = get_similar_users(target_user_id, similarity_matrix, user_ids, normalized_df)
    
    if not similar_users:
        return "No similar users found with sufficient overlap"
    
    recommendations = generate_recommendations(target_user_id, similar_users, normalized_df, top_n)
    
    return recommendations

In [41]:
recommendations = recommend_for_user(
    target_user_id=25, 
    similarity_matrix=similarity_matrix,  
    user_ids=user_ids,  
    normalized_df=normalized_df,  
    top_n=5
)

print(f"Recommended foods: {recommendations}")

Recommended foods: ['Pasta', 'Goat Meat', 'Shawarma', 'Jollof Rice', 'Amala']
