In this project, a kNN (k-nearest neighbor) recommender engine is built to make predictions regarding food.

**Given a food data set, what are the k most similar foods based on the query?**

### Experiment 1

In [41]:
# Importing packages
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors

The implementation assumes that all columns contain numerical data. 

In [44]:
#loading dataset
df=pd.read_csv("Dataset/Food Dataset - Sheet1.csv",index_col='Food ID')

In [45]:
df

Unnamed: 0_level_0,Food Name,Rating,User Region
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Poha,3,North
2,Kadhai Paneer,4,North
3,Mix Veg,4,North
4,Aloo Paratha,2,North
5,Gobhi Paratha,4,North
...,...,...,...
81,Zunka Bhakri,3,West
82,Bombay Duck,3,West
83,Methi ka Thepla,4,West
84,Gatte Ki Sabzi,3,West


In [46]:
one_hot = pd.get_dummies(df['User Region'])

In [47]:
one_hot

Unnamed: 0_level_0,East,North,South,West
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0,1,0,0
2,0,1,0,0
3,0,1,0,0
4,0,1,0,0
5,0,1,0,0
...,...,...,...,...
81,0,0,0,1
82,0,0,0,1
83,0,0,0,1
84,0,0,0,1


In [48]:
df = df.join(one_hot)

In [49]:
df

Unnamed: 0_level_0,Food Name,Rating,User Region,East,North,South,West
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Poha,3,North,0,1,0,0
2,Kadhai Paneer,4,North,0,1,0,0
3,Mix Veg,4,North,0,1,0,0
4,Aloo Paratha,2,North,0,1,0,0
5,Gobhi Paratha,4,North,0,1,0,0
...,...,...,...,...,...,...,...
81,Zunka Bhakri,3,West,0,0,0,1
82,Bombay Duck,3,West,0,0,0,1
83,Methi ka Thepla,4,West,0,0,0,1
84,Gatte Ki Sabzi,3,West,0,0,0,1


In [50]:
df = df.drop('User Region',axis = 1)

In [51]:
df

Unnamed: 0_level_0,Food Name,Rating,East,North,South,West
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Poha,3,0,1,0,0
2,Kadhai Paneer,4,0,1,0,0
3,Mix Veg,4,0,1,0,0
4,Aloo Paratha,2,0,1,0,0
5,Gobhi Paratha,4,0,1,0,0
...,...,...,...,...,...,...
81,Zunka Bhakri,3,0,0,0,1
82,Bombay Duck,3,0,0,0,1
83,Methi ka Thepla,4,0,0,0,1
84,Gatte Ki Sabzi,3,0,0,0,1


In [52]:
# Adding The Post data
post_data = {'Rating':[4], 'East':1, 'West':0, 'South':0, 'North':0}
the_post = pd.DataFrame(data=post_data, index=None)

In [53]:
# Selecting feature variables 
feature_cols = df.drop(['Food Name'], axis=1)
X = feature_cols

In [54]:
# Using NearestNeighbors model and kneighbors() method to find k neighbors.
# Setting n_neighbors = 5 to find 5 similar movies 
# Using brute force due to small sample size (30) and few dimensions (11)

neigh = NearestNeighbors(n_neighbors=10, algorithm='brute')
neigh.fit(X)
distances, indices = neigh.kneighbors(the_post)

Feature names must be in the same order as they were in fit.



In [56]:
# Printing the top 5 food items.

print('Recommendations according "Query":\n')
for i in range(len(distances.flatten())):
  print('{0}: {1}, with a distance of {2}.'.format(i+1, df['Food Name'].iloc[indices.flatten()[i]],distances.flatten()[i]))

Recommendations according "Query":

1: Veg Manchurian, with a distance of 0.0.
2: Dalma, with a distance of 0.0.
3: Puri Sabzi, with a distance of 0.0.
4: Raw banana gravy sabji, with a distance of 1.0.
5: Litti Chokha, with a distance of 1.0.
6: Thukpa, with a distance of 1.0.
7: Veg chowmein, with a distance of 1.0.
8: Machcher Jhol, with a distance of 1.0.
9: Jalfrezi, with a distance of 1.0.
10: Chicken Manchurian, with a distance of 1.0.


### Experiment 2

In [26]:
# Importing packages
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors

In [2]:
#loading dataset
food2=pd.read_csv("Dataset/FoodDataset2.csv",index_col='Food ID')

In [3]:
food2

Unnamed: 0_level_0,Food Name,Rating,Food Region,Food Type
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Poha,3,North,Sautéing
2,Kadhai Paneer,2,North,Sautéing
3,Mix Veg,3,North,Sautéing
4,Aloo Paratha,4,North,Searing
5,Gobhi Paratha,1,North,Searing
...,...,...,...,...
81,Zunka Bhakri,1,West,Searing
82,Bombay Duck,4,West,Searing
83,Methi ka Thepla,5,West,Searing
84,Gatte Ki Sabzi,3,West,Sautéing


In [4]:
one_hot_region = pd.get_dummies(food2['Food Region'])
one_hot_region

Unnamed: 0_level_0,East,North,South,West
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0,1,0,0
2,0,1,0,0
3,0,1,0,0
4,0,1,0,0
5,0,1,0,0
...,...,...,...,...
81,0,0,0,1
82,0,0,0,1
83,0,0,0,1
84,0,0,0,1


In [5]:
one_hot_foodtype = pd.get_dummies(food2['Food Type'])
one_hot_foodtype

Unnamed: 0_level_0,Baking,Broiling,Frying,Grilling,Roasting,Sautéing,Searing
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,0,0,0,0,1,0
2,0,0,0,0,0,1,0
3,0,0,0,0,0,1,0
4,0,0,0,0,0,0,1
5,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...
81,0,0,0,0,0,0,1
82,0,0,0,0,0,0,1
83,0,0,0,0,0,0,1
84,0,0,0,0,0,1,0


In [6]:
food2 = food2.join(one_hot_foodtype)

In [7]:
food2

Unnamed: 0_level_0,Food Name,Rating,Food Region,Food Type,Baking,Broiling,Frying,Grilling,Roasting,Sautéing,Searing
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Poha,3,North,Sautéing,0,0,0,0,0,1,0
2,Kadhai Paneer,2,North,Sautéing,0,0,0,0,0,1,0
3,Mix Veg,3,North,Sautéing,0,0,0,0,0,1,0
4,Aloo Paratha,4,North,Searing,0,0,0,0,0,0,1
5,Gobhi Paratha,1,North,Searing,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
81,Zunka Bhakri,1,West,Searing,0,0,0,0,0,0,1
82,Bombay Duck,4,West,Searing,0,0,0,0,0,0,1
83,Methi ka Thepla,5,West,Searing,0,0,0,0,0,0,1
84,Gatte Ki Sabzi,3,West,Sautéing,0,0,0,0,0,1,0


In [8]:
food2 = food2.join(one_hot_region)

In [10]:
food2

Unnamed: 0_level_0,Food Name,Rating,Food Region,Food Type,Baking,Broiling,Frying,Grilling,Roasting,Sautéing,Searing,East,North,South,West
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,Poha,3,North,Sautéing,0,0,0,0,0,1,0,0,1,0,0
2,Kadhai Paneer,2,North,Sautéing,0,0,0,0,0,1,0,0,1,0,0
3,Mix Veg,3,North,Sautéing,0,0,0,0,0,1,0,0,1,0,0
4,Aloo Paratha,4,North,Searing,0,0,0,0,0,0,1,0,1,0,0
5,Gobhi Paratha,1,North,Searing,0,0,0,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,Zunka Bhakri,1,West,Searing,0,0,0,0,0,0,1,0,0,0,1
82,Bombay Duck,4,West,Searing,0,0,0,0,0,0,1,0,0,0,1
83,Methi ka Thepla,5,West,Searing,0,0,0,0,0,0,1,0,0,0,1
84,Gatte Ki Sabzi,3,West,Sautéing,0,0,0,0,0,1,0,0,0,0,1


In [13]:
food2 = food2.drop(['Food Region','Food Type'],axis = 1)

In [14]:
food2

Unnamed: 0_level_0,Food Name,Rating,Baking,Broiling,Frying,Grilling,Roasting,Sautéing,Searing,East,North,South,West
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,Poha,3,0,0,0,0,0,1,0,0,1,0,0
2,Kadhai Paneer,2,0,0,0,0,0,1,0,0,1,0,0
3,Mix Veg,3,0,0,0,0,0,1,0,0,1,0,0
4,Aloo Paratha,4,0,0,0,0,0,0,1,0,1,0,0
5,Gobhi Paratha,1,0,0,0,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,Zunka Bhakri,1,0,0,0,0,0,0,1,0,0,0,1
82,Bombay Duck,4,0,0,0,0,0,0,1,0,0,0,1
83,Methi ka Thepla,5,0,0,0,0,0,0,1,0,0,0,1
84,Gatte Ki Sabzi,3,0,0,0,0,0,1,0,0,0,0,1


In [35]:
# Adding The Post data: Query
post_data = {'Rating':[7],'East':1, 'West':0, 'South':0, 'North':0, 'Baking':0,'Broiling':1,'Frying':0,'Grilling':1,
             'Roasting':1,'Sautéing':1,'Searing':0}
the_post = pd.DataFrame(data=post_data, index=None)

In [36]:
# Selecting feature variables 
feature_cols = food2.drop(['Food Name'], axis=1)
X = feature_cols

In [37]:
X

Unnamed: 0_level_0,Rating,Baking,Broiling,Frying,Grilling,Roasting,Sautéing,Searing,East,North,South,West
Food ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,3,0,0,0,0,0,1,0,0,1,0,0
2,2,0,0,0,0,0,1,0,0,1,0,0
3,3,0,0,0,0,0,1,0,0,1,0,0
4,4,0,0,0,0,0,0,1,0,1,0,0
5,1,0,0,0,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
81,1,0,0,0,0,0,0,1,0,0,0,1
82,4,0,0,0,0,0,0,1,0,0,0,1
83,5,0,0,0,0,0,0,1,0,0,0,1
84,3,0,0,0,0,0,1,0,0,0,0,1


In [38]:
# Using NearestNeighbors model and kneighbors() method to find k neighbors.
# Setting n_neighbors = 5 to find 5 similar movies 
# Using brute force due to small sample size (30) and few dimensions (11)

neigh = NearestNeighbors(n_neighbors=10)#, algorithm='brute')
neigh.fit(X)
distances, indices = neigh.kneighbors(the_post)

Feature names must be in the same order as they were in fit.



In [40]:
# Printing the top 5 movie recommendations:

print('Recommendations for "Query":\n')
for i in range(len(distances.flatten())):
    print('{0}: {1}, with a distance of {2}.'.format(i+1, food2['Food Name'].iloc[indices.flatten()[i]],distances.flatten()[i]))

Recommendations for "Query":

1: Dal Makhani, with a distance of 2.6457513110645907.
2: Jalfrezi, with a distance of 2.6457513110645907.
3: Manipuri Kelli Chana, with a distance of 3.0.
4: Pav Bhaji, with a distance of 3.0.
5: Aloo Gobhi, with a distance of 3.0.
6: Aloo Gazar Matar sabji, with a distance of 3.0.
7: Litti Chokha, with a distance of 3.0.
8: Veg Biryani, with a distance of 3.0.
9: Chicken Tikka Masala, with a distance of 3.0.
10: Tandoori pomfret, with a distance of 3.0.
