In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [30]:
df = pd.read_csv('food.csv')

In [31]:

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Food_ID   400 non-null    int64 
 1   Name      400 non-null    object
 2   C_Type    400 non-null    object
 3   Veg_Non   400 non-null    object
 4   Describe  400 non-null    object
dtypes: int64(1), object(4)
memory usage: 15.8+ KB


In [32]:
df.head()

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe
0,1,summer squash salad,Healthy Food,veg,"white balsamic vinegar, lemon juice, lemon rin..."
1,2,chicken minced salad,Healthy Food,non-veg,"olive oil, chicken mince, garlic (minced), oni..."
2,3,sweet chilli almonds,Snack,veg,"almonds whole, egg white, curry leaves, salt, ..."
3,4,tricolour salad,Healthy Food,veg,"vinegar, honey/sugar, soy sauce, salt, garlic ..."
4,5,christmas cake,Dessert,veg,"christmas dry fruits (pre-soaked), orange zest..."


In [33]:
df.isnull().sum()

Food_ID     0
Name        0
C_Type      0
Veg_Non     0
Describe    0
dtype: int64

In [34]:
df['C_Type'].value_counts()

Indian          88
Healthy Food    58
Dessert         53
Chinese         27
Italian         25
Snack           23
Thai            23
French          22
Mexican         21
Japanese        18
Beverage        11
Nepalese        11
Korean           9
Vietnames        9
 Korean          1
Spanish          1
Name: C_Type, dtype: int64

In [35]:
df['Veg_Non'].value_counts()

veg        238
non-veg    162
Name: Veg_Non, dtype: int64

In [36]:
df.duplicated().sum()

0

In [37]:
import nltk
import re
from nltk.corpus import stopwords
import string
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [100]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
def cleaning_text(text):
    text = text.lower()
    text = ''.join([' ' + char if char in string.punctuation else char for char in text])
    text = "".join([char for char in text if char not in string.punctuation])
    text = " ".join([char for char in text.split() if char not in stop_words])
    tokens = word_tokenize(text)
    processed_text = "".join([lemmatizer.lemmatize(token) for token in tokens])
    return text

In [101]:
df['Describe']

0      white balsamic vinegar, lemon juice, lemon rin...
1      olive oil, chicken mince, garlic (minced), oni...
2      almonds whole, egg white, curry leaves, salt, ...
3      vinegar, honey/sugar, soy sauce, salt, garlic ...
4      christmas dry fruits (pre-soaked), orange zest...
                             ...                        
395     cream cheese, chopped kimchi, scallions,count...
396    poblano chiles, bacon, shrips, red salsa, garl...
397    broccoli,Bread Crumbs,  anchovy fillets, garli...
398    egg yolks,lemon juice, unsalted butter, all pu...
399    kosher salt, rosemary, garlic, potato, olive o...
Name: Describe, Length: 400, dtype: object

In [103]:
df['Describe'] = df['Describe'].apply(cleaning_text)

In [104]:
df['Describe']

0      white balsamic vinegar lemon juice lemon rind ...
1      olive oil chicken mince garlic minced onion sa...
2      almonds whole egg white curry leaves salt suga...
3      vinegar honey sugar soy sauce salt garlic clov...
4      christmas dry fruits pre soaked orange zest le...
                             ...                        
395    cream cheese chopped kimchi scallions country ...
396    poblano chiles bacon shrips red salsa garlic c...
397    broccoli bread crumbs anchovy fillets garlic c...
398    egg yolks lemon juice unsalted butter purpose ...
399    kosher salt rosemary garlic potato olive oil c...
Name: Describe, Length: 400, dtype: object

# Content Based Filtering

In [105]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [106]:
tfidf = TfidfVectorizer()

In [107]:
vectors = tfidf.fit_transform(df['Describe'])

In [109]:
vectors.shape

(400, 1187)

In [110]:
from sklearn.metrics.pairwise import cosine_similarity

In [111]:
similarity = cosine_similarity(vectors)

In [112]:
similarity.shape

(400, 400)

In [119]:
df[df['Name'] =='chicken minced salad'].index[0] # to find the index

1

In [123]:
def food_recommend(food):
    food_index = df[df['Name']== food].index[0]
    distances = similarity[food_index]
    food_list = sorted(list(enumerate(distances)),reverse = True, key = lambda x:x[1])[1:6]
                       
    for i in food_list:
        
        print(df.iloc[i[0]]['Name'])
        

In [124]:
food_recommend('summer squash salad')

baked namakpara with roasted almond dip
shrimp & cilantro ceviche
spanish fish fry
green cucumber shots
amaranthus granola with lemon yogurt, berries and marigold


In [127]:
food_recommend('veg fried rice')

veg hakka noodles
chilli chicken
prawn fried rice
egg and garlic fried rice
Crispy Pakora


# Advanced content Based Filtering

In [131]:
df['Description'] =  df['C_Type'] + " " + df['Veg_Non'] + " " + df['Describe']

In [132]:
df

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe,Description
0,1,summer squash salad,Healthy Food,veg,white balsamic vinegar lemon juice lemon rind ...,Healthy Food veg white balsamic vinegar lemon ...
1,2,chicken minced salad,Healthy Food,non-veg,olive oil chicken mince garlic minced onion sa...,Healthy Food non-veg olive oil chicken mince g...
2,3,sweet chilli almonds,Snack,veg,almonds whole egg white curry leaves salt suga...,Snack veg almonds whole egg white curry leaves...
3,4,tricolour salad,Healthy Food,veg,vinegar honey sugar soy sauce salt garlic clov...,Healthy Food veg vinegar honey sugar soy sauce...
4,5,christmas cake,Dessert,veg,christmas dry fruits pre soaked orange zest le...,Dessert veg christmas dry fruits pre soaked or...
...,...,...,...,...,...,...
395,396,Kimchi Toast,Korean,veg,cream cheese chopped kimchi scallions country ...,Korean veg cream cheese chopped kimchi scallio...
396,397,"Tacos de Gobernador (Shrimp, Poblano, and Chee...",Mexican,non-veg,poblano chiles bacon shrips red salsa garlic c...,Mexican non-veg poblano chiles bacon shrips re...
397,398,Melted Broccoli Pasta With Capers and Anchovies,French,non-veg,broccoli bread crumbs anchovy fillets garlic c...,French non-veg broccoli bread crumbs anchovy f...
398,399,Lemon-Ginger Cake with Pistachios,Dessert,non-veg,egg yolks lemon juice unsalted butter purpose ...,Dessert non-veg egg yolks lemon juice unsalted...


In [133]:
df['Description']

0      Healthy Food veg white balsamic vinegar lemon ...
1      Healthy Food non-veg olive oil chicken mince g...
2      Snack veg almonds whole egg white curry leaves...
3      Healthy Food veg vinegar honey sugar soy sauce...
4      Dessert veg christmas dry fruits pre soaked or...
                             ...                        
395    Korean veg cream cheese chopped kimchi scallio...
396    Mexican non-veg poblano chiles bacon shrips re...
397    French non-veg broccoli bread crumbs anchovy f...
398    Dessert non-veg egg yolks lemon juice unsalted...
399    Healthy Food veg kosher salt rosemary garlic p...
Name: Description, Length: 400, dtype: object

In [136]:
df['Description'].str.lower()

0      healthy food veg white balsamic vinegar lemon ...
1      healthy food non-veg olive oil chicken mince g...
2      snack veg almonds whole egg white curry leaves...
3      healthy food veg vinegar honey sugar soy sauce...
4      dessert veg christmas dry fruits pre soaked or...
                             ...                        
395    korean veg cream cheese chopped kimchi scallio...
396    mexican non-veg poblano chiles bacon shrips re...
397    french non-veg broccoli bread crumbs anchovy f...
398    dessert non-veg egg yolks lemon juice unsalted...
399    healthy food veg kosher salt rosemary garlic p...
Name: Description, Length: 400, dtype: object

In [137]:
from sklearn.feature_extraction.text import CountVectorizer

In [138]:
vector = CountVectorizer()

In [139]:
vec_desc =  vector.fit_transform(df['Description'])

In [140]:
vec_desc.shape

(400, 1196)

In [141]:
similar_vec = cosine_similarity(vec_desc)

In [142]:
similar_vec.shape

(400, 400)

In [150]:
similar_vec

array([[1.        , 0.37282186, 0.24397502, ..., 0.32084447, 0.20203051,
        0.39036003],
       [0.37282186, 1.        , 0.16979054, ..., 0.3588535 , 0.17574991,
        0.42447636],
       [0.24397502, 0.16979054, 1.        , ..., 0.12524486, 0.20701967,
        0.13333333],
       ...,
       [0.32084447, 0.3588535 , 0.12524486, ..., 1.        , 0.12964074,
        0.31311215],
       [0.20203051, 0.17574991, 0.20701967, ..., 0.12964074, 1.        ,
        0.06900656],
       [0.39036003, 0.42447636, 0.13333333, ..., 0.31311215, 0.06900656,
        1.        ]])

In [204]:
def food_recommend_vec(food):
    food_index = df[df['Name']== food].index[0]
    distances = similar_vec[food_index]
    food_list = sorted(list(enumerate(distances)),reverse = True, key = lambda x:x[1])[1:6]
                       
    for i in food_list:
        
        print(df['Name'].iloc[i[0]])
        

In [163]:
food_recommend_vec('summer squash salad',similar_vec)

shepherds salad (tamatar-kheera salaad)
green cucumber shots
Grilled Chicken with Almond and Garlic Sauce
mustard-parmesan whole roasted cauliflower
baked namakpara with roasted almond dip


In [164]:
food_recommend_vec('veg fried rice')

veg hakka noodles
egg and garlic fried rice
prawn fried rice
lotus leaf wrapped fried rice
chicken potli


In [165]:
food_recommend_vec('christmas cake')

lemon poppy seed cake 
Fig and Sesame Tart with Cardamom Orange Cream
lemon poppy seed cake
chocolate lava cake
Lemon-Ginger Cake with Pistachios


In [166]:
food_recommend('christmas cake')

Grilled Chicken with Almond and Garlic Sauce
Fig and Sesame Tart with Cardamom Orange Cream
baba budan no. 7
whole wheat cake
chocolate chip cheesecake


In [167]:
food_recommend_vec('tricolour salad')

chicken minced salad
chilli chicken
roast turkey with cranberry sauce
Sesame Noodles with Chili Oil and Scallions
vegetable som tam salad


In [168]:
food_recommend('tricolour salad')

chilli chicken
Vietnamese Chicken Salad
chicken minced salad
veg fried rice
vegetable som tam salad


# Collaborative Filtering

In [171]:
ratings = pd.read_csv("ratings_food[1].csv")

In [173]:
ratings.head()

Unnamed: 0,User_ID,Food_ID,Rating
0,1.0,88.0,4.0
1,1.0,46.0,3.0
2,1.0,24.0,5.0
3,1.0,25.0,4.0
4,2.0,49.0,1.0


In [174]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 512 entries, 0 to 511
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   User_ID  511 non-null    float64
 1   Food_ID  511 non-null    float64
 2   Rating   511 non-null    float64
dtypes: float64(3)
memory usage: 12.1 KB


In [175]:
ratings.isnull().sum()

User_ID    1
Food_ID    1
Rating     1
dtype: int64

In [176]:
ratings.tail()

Unnamed: 0,User_ID,Food_ID,Rating
507,99.0,22.0,1.0
508,100.0,24.0,10.0
509,100.0,233.0,10.0
510,100.0,29.0,7.0
511,,,


In [177]:
ratings = ratings[:511]

In [178]:
ratings.isnull().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 511 entries, 0 to 510
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   User_ID  511 non-null    bool 
 1   Food_ID  511 non-null    bool 
 2   Rating   511 non-null    bool 
dtypes: bool(3)
memory usage: 1.6 KB


In [180]:
ratings.head()

Unnamed: 0,User_ID,Food_ID,Rating
0,1.0,88.0,4.0
1,1.0,46.0,3.0
2,1.0,24.0,5.0
3,1.0,25.0,4.0
4,2.0,49.0,1.0


In [182]:
food_rating = ratings.groupby(by='Food_ID').count()

In [186]:
food_rating

Unnamed: 0_level_0,User_ID,Rating
Food_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,2
2.0,3,3
3.0,2,2
4.0,2,2
5.0,6,6
...,...,...
305.0,1,1
306.0,1,1
307.0,1,1
308.0,1,1


In [188]:
food_rating = food_rating['Rating'].reset_index().rename(columns={'Rating':'Rating_count'})

In [189]:
food_rating

Unnamed: 0,Food_ID,Rating_count
0,1.0,2
1,2.0,3
2,3.0,2
3,4.0,2
4,5.0,6
...,...,...
304,305.0,1
305,306.0,1
306,307.0,1
307,308.0,1


In [190]:
food_rating['Rating_count'].describe()

count    309.000000
mean       1.653722
std        1.107748
min        1.000000
25%        1.000000
50%        1.000000
75%        2.000000
max        7.000000
Name: Rating_count, dtype: float64

In [192]:
user_rating = ratings.groupby(by= 'User_ID').count()

In [193]:
user_rating

Unnamed: 0_level_0,Food_ID,Rating
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,4,4
2.0,4,4
3.0,9,9
4.0,6,6
5.0,6,6
...,...,...
96.0,6,6
97.0,7,7
98.0,7,7
99.0,6,6


In [195]:
user_rating =  user_rating['Rating'].reset_index().rename(columns={'Rating':'Rating_count'})

In [197]:
user_rating['Rating_count'].describe()

count    100.000000
mean       5.110000
std        2.352282
min        1.000000
25%        3.000000
50%        5.000000
75%        7.000000
max       11.000000
Name: Rating_count, dtype: float64

In [198]:
rating_matrix = ratings.pivot_table(index='Food_ID',columns='User_ID',values='Rating').fillna(0)
rating_matrix.head()

User_ID,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0
Food_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,7.0,0.0,0.0


In [199]:
rating_matrix.shape

(309, 100)

In [200]:
from scipy.sparse import csr_matrix
csr_rating_matrix =  csr_matrix(rating_matrix.values)
print(csr_rating_matrix)

  (0, 48)	5.0
  (0, 70)	10.0
  (1, 8)	3.0
  (1, 21)	5.0
  (1, 38)	10.0
  (2, 76)	1.0
  (2, 88)	7.0
  (3, 40)	6.0
  (3, 89)	6.0
  (4, 27)	10.0
  (4, 38)	10.0
  (4, 72)	7.0
  (4, 87)	3.0
  (4, 93)	2.0
  (4, 97)	7.0
  (5, 42)	10.0
  (5, 48)	6.0
  (5, 76)	6.0
  (5, 91)	1.0
  (6, 16)	4.0
  (6, 47)	5.0
  (6, 62)	9.0
  (6, 69)	8.0
  (6, 70)	8.0
  (7, 4)	6.0
  :	:
  (284, 30)	9.0
  (285, 80)	6.0
  (286, 24)	3.0
  (287, 54)	3.0
  (288, 55)	9.0
  (289, 31)	7.0
  (290, 15)	1.0
  (291, 2)	8.0
  (292, 95)	5.0
  (293, 41)	4.0
  (294, 43)	10.0
  (295, 41)	10.0
  (296, 94)	5.0
  (297, 55)	4.0
  (298, 2)	1.0
  (299, 28)	9.0
  (300, 53)	1.0
  (301, 77)	5.0
  (302, 63)	6.0
  (303, 29)	1.0
  (304, 55)	9.0
  (305, 79)	8.0
  (306, 70)	1.0
  (307, 96)	3.0
  (308, 31)	5.0


In [201]:
from sklearn.neighbors import NearestNeighbors

In [202]:
recommender = NearestNeighbors(metric='cosine')

In [203]:
recommender.fit(csr_rating_matrix)

NearestNeighbors(metric='cosine')

In [212]:
def Get_Recommendations(title):
    user= df[df['Name']==title]
    user_index = np.where(rating_matrix.index==int(user['Food_ID']))[0][0]
    user_ratings = rating_matrix.iloc[user_index]

    reshaped = user_ratings.values.reshape(1,-1)
    distances, indices = recommender.kneighbors(reshaped,n_neighbors=16)
    
    nearest_neighbors_indices = rating_matrix.iloc[indices[0]].index[1:]
    nearest_neighbors = pd.DataFrame({'Food_ID': nearest_neighbors_indices})
    
    
    result = pd.merge(nearest_neighbors,df,on='Food_ID',how='left')
    
    
    return result.head()

In [214]:
Get_Recommendations('tricolour salad')

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe,Description
0,126.0,andhra crab meat masala,Indian,non-veg,processed crab meat refined oil curry leaves g...,Indian non-veg processed crab meat refined oil...
1,75.0,detox haldi tea,Beverage,veg,haldi ginger black pepper honey water,Beverage veg haldi ginger black pepper honey w...
2,100.0,spicy chicken curry,Indian,non-veg,oil ghee onion paste garlic paste ginger paste...,Indian non-veg oil ghee onion paste garlic pas...
3,259.0,ragi coconut ladoo (laddu),Dessert,veg,finger millet flour ragi jaggery peanuts cocon...,Dessert veg finger millet flour ragi jaggery p...
4,51.0,christmas chocolate fudge cookies,Dessert,veg,unsalted butter brown sugar chocolate chocolat...,Dessert veg unsalted butter brown sugar chocol...


In [215]:
food_recommend('tricolour salad')

chilli chicken
Vietnamese Chicken Salad
chicken minced salad
veg fried rice
vegetable som tam salad


In [216]:
food_recommend_vec('tricolour salad')

chicken minced salad
chilli chicken
roast turkey with cranberry sauce
Sesame Noodles with Chili Oil and Scallions
vegetable som tam salad


In [218]:
food_rating_df = pd.merge(ratings,df,on='Food_ID',how='left')

In [220]:
food_rating_df[food_rating_df['Name']== 'tricolour salad']['Rating']

210    6.0
457    6.0
Name: Rating, dtype: float64

In [221]:
food_rating_df[food_rating_df['Rating']==6]

Unnamed: 0,User_ID,Food_ID,Rating,Name,C_Type,Veg_Non,Describe,Description
12,3.0,209.0,6.0,camel milk cake tart,Dessert,veg,camel milk sugar vinegar butter brown sugar ma...,Dessert veg camel milk sugar vinegar butter br...
21,4.0,128.0,6.0,thai lamb balls,Thai,non-veg,lamb minced couscous scallion garlic egg parsl...,Thai non-veg lamb minced couscous scallion gar...
27,5.0,8.0,6.0,lamb and chargrilled bell pepper soup,Healthy Food,non-veg,lamb bones preferably shank shoulder onions ce...,Healthy Food non-veg lamb bones preferably sha...
28,5.0,27.0,6.0,hawaiin papaya salad,Healthy Food,veg,papaya fresh lime juiced watermelon balls smal...,Healthy Food veg papaya fresh lime juiced wate...
32,6.0,213.0,6.0,berry parfait hazelnut white chocolate sable,Dessert,veg,berry parfait egg yolk caster sugar berry pure...,Dessert veg berry parfait egg yolk caster suga...
53,9.0,184.0,6.0,vegetable bruschetta,Italian,veg,baguette grilled slices black olive tapenade a...,Italian veg baguette grilled slices black oliv...
77,15.0,116.0,6.0,pan seared thigh of chicken,Mexican,non-veg,chicken thai salt pepper lemon fresh thyme bar...,Mexican non-veg chicken thai salt pepper lemon...
82,15.0,238.0,6.0,holi special malai kofta,Indian,veg,potatoes paneer cottage cheese maida coriander...,Indian veg potatoes paneer cottage cheese maid...
101,18.0,17.0,6.0,baked namakpara with roasted almond dip,Snack,veg,almonds crushed tomato garlic cloves basil spr...,Snack veg almonds crushed tomato garlic cloves...
107,20.0,46.0,6.0,steam bunny chicken bao,Japanese,non-veg,buns purpose white flour dry yeast sugar salt ...,Japanese non-veg buns purpose white flour dry ...
