## Create a restaurant recommendation system based on user preferences.

shrikrishna 

In [1]:
# Import Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import pdist, squareform
pd.reset_option('display.max_rows')

Matplotlib is building the font cache; this may take a moment.


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Creating a Dataframe
df = pd.read_csv("Dataset.csv")
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [4]:
df.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [5]:
dfRS = df[['Restaurant ID','Restaurant Name','Cuisines','Aggregate rating','Votes']]
dfRS

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Aggregate rating,Votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


## Data Cleaning 

In [6]:
# Gathering information of every columns

# Columns Description
def dataDesc():
    listItem = []
    for col in dfRS.columns :
        listItem.append(
            [col,
            dfRS[col].dtype,
            dfRS[col].isna().sum(),
            round(dfRS[col].isna().sum()/len(dfRS)*100,2),
            dfRS[col].nunique(),
            list(dfRS[col].drop_duplicates().sample(2).values)]
        )
    descData = pd.DataFrame(data = listItem,
                            columns = ['Column','Data Type', 'Missing Value',
                                        'Pct Missing Value', 'Num Unique', 'Unique Sample'])
    return descData

dataDesc()

Unnamed: 0,Column,Data Type,Missing Value,Pct Missing Value,Num Unique,Unique Sample
0,Restaurant ID,int64,0,0.0,9551,"[2727, 1372]"
1,Restaurant Name,object,0,0.0,7446,"[K Raga's, Bakerz Lodge]"
2,Cuisines,object,9,0.09,1825,"[Cuisine Varies, Italian, Mediterranean]"
3,Aggregate rating,float64,0,0.0,33,"[3.9, 2.0]"
4,Votes,int64,0,0.0,1012,"[163, 501]"


In [7]:
dfRS = dfRS.dropna()

In [8]:
dfRS

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Aggregate rating,Votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


In [9]:
# Renaming the Columns
dfRS = dfRS.rename(columns={'Restaurant ID': 'restaurant_id'})
dfRS = dfRS.rename(columns={'Restaurant Name': 'restaurant_name'})
dfRS = dfRS.rename(columns={'Cuisines': 'cuisines'})
dfRS = dfRS.rename(columns={'Aggregate rating': 'aggregate_rating'})
dfRS = dfRS.rename(columns={'Votes': 'votes'})

In [10]:
dfRS

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


In [11]:
# Check for Duplicates
dfRS.duplicated().sum()

0

In [12]:
dfRS['restaurant_name'].duplicated().sum()

2105

In [13]:
dfRS['restaurant_name'].value_counts()

Cafe Coffee Day                       83
Domino's Pizza                        79
Subway                                63
Green Chick Chop                      51
McDonald's                            48
                                      ..
Da Bawarchi                            1
Malik Vegetarian Rasoi                 1
Anchorage Cafe Restaurant Wine Bar     1
Nukkad                                 1
Cafe Bite                              1
Name: restaurant_name, Length: 7437, dtype: int64

In [14]:
dfRS = dfRS.sort_values(by=['restaurant_name','aggregate_rating'],ascending=False)

In [15]:
dfRS[dfRS['restaurant_name']=="Domino's Pizza"].head()

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
3031,143,Domino's Pizza,"Pizza, Fast Food",3.7,336
1844,5065,Domino's Pizza,"Pizza, Fast Food",3.6,146
2448,15078,Domino's Pizza,"Pizza, Fast Food",3.6,86
7618,18263236,Domino's Pizza,"Pizza, Fast Food",3.6,24
8437,384,Domino's Pizza,"Pizza, Fast Food",3.6,547


In [16]:
# Dropping duplicaes only keeping first Value.
dfRS = dfRS.drop_duplicates('restaurant_name',keep='first')
dfRS

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
9523,6000871,�ukura��a Sofras۱,"Kebab, Izgara",4.4,296
3120,18222559,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",4.1,492
9334,7100938,wagamama,"Japanese, Asian",3.7,131
9454,6401789,tashas,"Cafe, Mediterranean",4.1,374
4659,18361747,t Lounge by Dilmah,"Cafe, Tea, Desserts",3.6,34
...,...,...,...,...,...
8692,18317511,#Urban Caf��,"North Indian, Chinese, Italian",3.3,49
6998,18336489,#OFF Campus,"Cafe, Continental, Italian, Fast Food",3.7,216
2613,18311951,#InstaFreeze,Ice Cream,0.0,2
9148,18378803,#Dilliwaala6,North Indian,3.7,124


In [17]:
dfRS['restaurant_name'].value_counts()

Knights Chaska                   1
Monkeypod Kitchen by Merriman    1
HuHot Mongolian Grill            1
Punjabi Dhani                    1
The Flying Pan                   1
                                ..
Punjabi Chaska                   1
Dada Ka Dhaba                    1
Yummy Cake                       1
Bern's Steak House               1
Cafe Bite                        1
Name: restaurant_name, Length: 7437, dtype: int64

In [18]:
dfRS = dfRS[dfRS['aggregate_rating']>=4.0]
dfRS

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
9523,6000871,�ukura��a Sofras۱,"Kebab, Izgara",4.4,296
3120,18222559,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",4.1,492
9454,6401789,tashas,"Cafe, Mediterranean",4.1,374
9385,6113857,sketch Gallery,"British, Contemporary",4.5,148
1837,18418247,feel ALIVE,"North Indian, American, Asian, Biryani",4.7,69
...,...,...,...,...,...
1468,18408054,19 Flavours Biryani,"Mughlai, Hyderabadi",4.1,84
2484,18233317,145 Kala Ghoda,"Fast Food, Beverages, Desserts",4.2,1606
2292,2100784,11th Avenue Cafe Bistro,"Cafe, American, Italian, Continental",4.1,377
751,2600031,10 Downing Street,"North Indian, Chinese",4.0,257


In [19]:
# Split Cuisines into list
dfRS['cuisines'] = dfRS['cuisines'].str.split(', ')
dfRS

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
9523,6000871,�ukura��a Sofras۱,"[Kebab, Izgara]",4.4,296
3120,18222559,{Niche} - Cafe & Bar,"[North Indian, Chinese, Italian, Continental]",4.1,492
9454,6401789,tashas,"[Cafe, Mediterranean]",4.1,374
9385,6113857,sketch Gallery,"[British, Contemporary]",4.5,148
1837,18418247,feel ALIVE,"[North Indian, American, Asian, Biryani]",4.7,69
...,...,...,...,...,...
1468,18408054,19 Flavours Biryani,"[Mughlai, Hyderabadi]",4.1,84
2484,18233317,145 Kala Ghoda,"[Fast Food, Beverages, Desserts]",4.2,1606
2292,2100784,11th Avenue Cafe Bistro,"[Cafe, American, Italian, Continental]",4.1,377
751,2600031,10 Downing Street,"[North Indian, Chinese]",4.0,257


In [20]:
# Exploding 'cuisines'
dfRS = dfRS.explode('cuisines')
dfRS

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
9523,6000871,�ukura��a Sofras۱,Kebab,4.4,296
9523,6000871,�ukura��a Sofras۱,Izgara,4.4,296
3120,18222559,{Niche} - Cafe & Bar,North Indian,4.1,492
3120,18222559,{Niche} - Cafe & Bar,Chinese,4.1,492
3120,18222559,{Niche} - Cafe & Bar,Italian,4.1,492
...,...,...,...,...,...
2292,2100784,11th Avenue Cafe Bistro,Italian,4.1,377
2292,2100784,11th Avenue Cafe Bistro,Continental,4.1,377
751,2600031,10 Downing Street,North Indian,4.0,257
751,2600031,10 Downing Street,Chinese,4.0,257


In [21]:
# Cuisines Check
dfRS['cuisines'].value_counts()

North Indian    270
Italian         237
Chinese         200
Continental     199
Cafe            177
               ... 
Persian           1
Asian Fusion      1
Nepalese          1
New American      1
Peranakan         1
Name: cuisines, Length: 128, dtype: int64

In [22]:
# Cross Tabulate Restaurant Name and Cuisines
xTabRestoCuisines = pd.crosstab(dfRS['restaurant_name'],
                                dfRS['cuisines'])

In [23]:
xTabRestoCuisines

cuisines,Afghani,African,American,Andhra,Arabian,Argentine,Asian,Asian Fusion,Australian,Awadhi,...,Teriyaki,Tex-Mex,Thai,Tibetan,Turkish,Turkish Pizza,Vegetarian,Vietnamese,Western,World Cuisine
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Ohana,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10 Downing Street,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11th Avenue Cafe Bistro,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
145 Kala Ghoda,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19 Flavours Biryani,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feel ALIVE,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
sketch Gallery,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tashas,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
{Niche} - Cafe & Bar,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
# Checking on restaurant name value
xTabRestoCuisines.loc['feel ALIVE'].values

array([0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [25]:
# Resto Names Sample
dfRS['restaurant_name'].sample(20, random_state=101)

439                     Mrs. Wilkes' Dining Room
9545                                    Baltazar
6921                                   Rose Cafe
108                          Big City Bread Cafe
2311                                Olive Bistro
120                            Transmetropolitan
4387                          Maxims Pastry Shop
9222                                      Meraki
9359                            Mimi's Bakehouse
2438                            Cappuccino Blast
8049                               Oh So Stoned!
9544                       Karak�_y G�_ll�_o��lu
579                                    Via Delhi
376                  Tu-Do Vietnamese Restaurant
4088    Tian - Asian Cuisine Studio - ITC Maurya
153                            Boise Fry Company
172                           Ting's Red Lantern
3107                                Odeon Social
9513                                  The Sizzle
839                              Sree Annapoorna
Name: restaurant_nam

In [26]:
# Measure Similarity
print(jaccard_score(xTabRestoCuisines.loc["Olive Bistro"].values,
                    xTabRestoCuisines.loc["Rose Cafe"].values))

0.3333333333333333


In [27]:
# Create Similarity Value DF
jaccardDist = pdist(xTabRestoCuisines.values, metric='jaccard')
jaccardMatrix = squareform(jaccardDist)
jaccardSim = 1 - jaccardMatrix
dfJaccard = pd.DataFrame(
    jaccardSim,
    index=xTabRestoCuisines.index,
    columns=xTabRestoCuisines.index)

dfJaccard

restaurant_name,'Ohana,10 Downing Street,11th Avenue Cafe Bistro,145 Kala Ghoda,19 Flavours Biryani,1918 Bistro & Grill,2 Dog,22nd Parallel,3 Wise Monkeys,38 Barracks,...,Zoeys Pizzeria,Zolocrust - Hotel Clarks Amer,Zombie Burger + Drink Lab,Zuka Choco-la,Zunzi's,feel ALIVE,sketch Gallery,tashas,{Niche} - Cafe & Bar,�ukura��a Sofras۱
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Ohana,1.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.00,0.000000,0.0,0.0,0.000000,0.0
10 Downing Street,0.0,1.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.200000,...,0.0,0.0,0.0,0.000000,0.00,0.200000,0.0,0.0,0.500000,0.0
11th Avenue Cafe Bistro,0.0,0.0,1.000000,0.0,0.0,0.0,0.166667,0.0,0.0,0.333333,...,0.0,0.4,0.0,0.000000,0.00,0.142857,0.0,0.2,0.333333,0.0
145 Kala Ghoda,0.0,0.0,0.000000,1.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.2,0.333333,0.00,0.000000,0.0,0.0,0.000000,0.0
19 Flavours Biryani,0.0,0.0,0.000000,0.0,1.0,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.00,0.000000,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feel ALIVE,0.0,0.2,0.142857,0.0,0.0,0.0,0.166667,0.0,0.0,0.600000,...,0.0,0.0,0.0,0.000000,0.00,1.000000,0.0,0.0,0.142857,0.0
sketch Gallery,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.00,0.000000,1.0,0.0,0.000000,0.0
tashas,0.0,0.0,0.200000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.25,0.000000,0.0,1.0,0.000000,0.0
{Niche} - Cafe & Bar,0.0,0.5,0.333333,0.0,0.0,0.0,0.000000,0.0,0.0,0.333333,...,0.0,0.4,0.0,0.000000,0.00,0.142857,0.0,0.0,1.000000,0.0


In [28]:
# Resto Names Sample
dfRS['restaurant_name'].sample(20)

3703             Sakley's The Mountain Cafe
9169                       Agent Jack's Bar
385                             Tuscan Oven
373              Cactus Flower Cafe Navarre
8072                               Mamagoto
9275               Flying Spaghetti Monster
108                     Big City Bread Cafe
9537                    J'adore Chocolatier
2334                      Cakesmith's Alley
55                           Talho Capixaba
6461                        The Coffee Shop
7083                   Elma's at Good Earth
9443                 Origin Coffee Roasting
9307                            Chinoiserie
252                              Cool Basil
1250    Delifrance - The France Cafe Bakery
2343                                    WTF
6447                           Bakerz Lodge
2441                          Indian Summer
823                          Coal Barbecues
Name: restaurant_name, dtype: object

## Final Recommendation System

In [29]:
# Input Initial Restaurant Name
resto = 'Ooma'

sim = dfJaccard.loc[resto].sort_values(ascending=False)

sim = pd.DataFrame({'restaurant_name': sim.index, 'simScore': sim.values})
sim = sim[(sim['restaurant_name']!= resto) & (sim['simScore']>=0.7)].head(5)

# Merge The Rating
RestoRec = pd.merge(sim,dfRS[['restaurant_name','aggregate_rating']],how='inner',on='restaurant_name')
FinalRestoRec = RestoRec.sort_values('aggregate_rating',ascending=False).drop_duplicates('restaurant_name',keep='first')

In [30]:
FinalRestoRec

Unnamed: 0,restaurant_name,simScore,aggregate_rating
4,Miyabi 9,1.0,4.8
8,Roka,1.0,4.6
0,Nobu,1.0,4.4
2,Ichiban,1.0,4.3
6,Nagai,1.0,4.3


### Conclusion:
    The above Data will show up to top 5 recommended restaurants with the best rating, the rating is also curated only 4 and above, so the reccomendation system porvide good rating objectively.