# **Restaurant Recommendation System**

## Supporting Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go
import seaborn as sns
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')
import nltk
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

## Loading dataset

In [None]:
zomato_data=pd.read_csv("/content/drive/MyDrive/zomato.csv.zip")
zomato_df=zomato_data.copy()
zomato_df.head(2)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari


## Checking dataset size

In [None]:
zomato_df.shape

(51717, 17)

## Dataset Overview

In [None]:
zomato_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   url                          51717 non-null  object
 1   address                      51717 non-null  object
 2   name                         51717 non-null  object
 3   online_order                 51717 non-null  object
 4   book_table                   51717 non-null  object
 5   rate                         43942 non-null  object
 6   votes                        51717 non-null  int64 
 7   phone                        50509 non-null  object
 8   location                     51696 non-null  object
 9   rest_type                    51490 non-null  object
 10  dish_liked                   23639 non-null  object
 11  cuisines                     51672 non-null  object
 12  approx_cost(for two people)  51371 non-null  object
 13  reviews_list                 51

## Checking for null values

In [None]:
zomato_df.isnull().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

## Data Cleaning

In [None]:
#Dropping the column "dish_liked", "phone", "url"
zomato_df=zomato_df.drop(['phone','dish_liked','url'],axis=1)

#Remove the NaN values from the dataset
zomato_df.dropna(how='any',inplace=True)

#Removing the Duplicates
zomato_df.duplicated().sum()
zomato_df.drop_duplicates(inplace=True)

#Changing the column names
zomato_df = zomato_df.rename(columns={'approx_cost(for two people)':'cost','listed_in(type)':'type', 'listed_in(city)':'city'})

#Removing '/5' from Rates
zomato_df = zomato_df.loc[zomato_df.rate !='NEW']
zomato_df = zomato_df.loc[zomato_df.rate !='-'].reset_index(drop=True)
remove_slash = lambda x: x.replace('/5', '') if type(x) == np.str else x
zomato_df.rate = zomato_df.rate.apply(remove_slash).str.strip().astype('float')

#Changing the cost to string
zomato_df['cost'] = zomato_df['cost'].astype(str) 
zomato_df['cost'] = zomato_df['cost'].apply(lambda x: x.replace(',','.'))
zomato_df['cost'] = zomato_df['cost'].astype(float)

In [None]:
zomato_df.shape

(41237, 14)

In [None]:
zomato_df.isnull().sum()

address         0
name            0
online_order    0
book_table      0
rate            0
votes           0
location        0
rest_type       0
cuisines        0
cost            0
reviews_list    0
menu_item       0
type            0
city            0
dtype: int64

## Mean rating for each restaurants

In [None]:
## Computing Mean Rating
restaurants = list(zomato_df['name'].unique())
zomato_df['Mean Rating'] = 0
for i in range(len(restaurants)):
    zomato_df['Mean Rating'][zomato_df['name'] == restaurants[i]] = zomato_df['rate'][zomato_df['name'] == restaurants[i]].mean()   
#Scaling the mean rating values
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (1,5))
zomato_df[['Mean Rating']] = scaler.fit_transform(zomato_df[['Mean Rating']]).round(2)

## Checking the mean rating with restaurant name and rating

In [None]:
zomato_df[['name','rate','Mean Rating']].head()

Unnamed: 0,name,rate,Mean Rating
0,Jalsa,4.1,3.99
1,Spice Elephant,4.1,3.97
2,San Churro Cafe,3.8,3.58
3,Addhuri Udupi Bhojana,3.7,3.45
4,Grand Village,3.8,3.58


## Text Preprocessing and Cleaning
We will be using the ‘Review’ and ‘Cuisines’ feature in order to create a recommender system

In [None]:
## Lower Casing
zomato_df["reviews_list"] = zomato_df["reviews_list"].str.lower()

## Removal of Puctuations
import string
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    """custom function to remove the punctuation"""
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))
zomato_df["reviews_list"] = zomato_df["reviews_list"].apply(lambda text: remove_punctuation(text))



In [None]:
zomato_df[['reviews_list', 'cuisines']].sample(5)

Unnamed: 0,reviews_list,cuisines
37216,rated 30 ratedn i was shocked by the taste of...,"Ice Cream, Desserts"
15265,rated 50 ratedn an absolute throw back to the...,"South Indian, Finger Food"
31196,rated 10 ratedn worst foodvery unhygienicambi...,South Indian
15200,rated 40 ratedn the food is pretty decent com...,"Chinese, Rolls"
2686,rated 40 ratedn its a nice eat out when you w...,"South Indian, Healthy Food"


In [None]:
def get_top_words(column, top_nu_of_words, nu_of_word):
    
    vec = CountVectorizer(ngram_range= nu_of_word, stop_words='english')
    
    bag_of_words = vec.fit_transform(column)
    
    sum_words = bag_of_words.sum(axis=0)
    
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    
    return words_freq[:top_nu_of_words]

## CONTENT-BASE RECOMMENDER SYSTEM

In [None]:
# RESTAURANT NAMES:
restaurant_names = list(zomato_df['name'].unique())
def get_top_words(column, top_nu_of_words, nu_of_word):
    vec = CountVectorizer(ngram_range= nu_of_word, stop_words='english')
    bag_of_words = vec.fit_transform(column)
    sum_words = bag_of_words.sum(axis=0)
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    return words_freq[:top_nu_of_words]
    
zomato_df=zomato_df.drop(['address','rest_type', 'type', 'menu_item', 'votes'],axis=1)


# Randomly sample 60% of your dataframe
df_percent = zomato_df.sample(frac=0.5)

In [None]:
zomato_df.head()

Unnamed: 0,name,online_order,book_table,rate,location,cuisines,cost,reviews_list,city,Mean Rating
0,Jalsa,Yes,Yes,4.1,Banashankari,"North Indian, Mughlai, Chinese",800.0,rated 40 ratedn a beautiful place to dine int...,Banashankari,3.99
1,Spice Elephant,Yes,No,4.1,Banashankari,"Chinese, North Indian, Thai",800.0,rated 40 ratedn had been here for dinner with...,Banashankari,3.97
2,San Churro Cafe,Yes,No,3.8,Banashankari,"Cafe, Mexican, Italian",800.0,rated 30 ratedn ambience is not that good eno...,Banashankari,3.58
3,Addhuri Udupi Bhojana,No,No,3.7,Banashankari,"South Indian, North Indian",300.0,rated 40 ratedn great food and proper karnata...,Banashankari,3.45
4,Grand Village,No,No,3.8,Basavanagudi,"North Indian, Rajasthani",600.0,rated 40 ratedn very good restaurant in neigh...,Banashankari,3.58


In [None]:
 zomato_df.to_csv("restaurant1.csv")

In [None]:
df_percent.head()

Unnamed: 0,name,online_order,book_table,rate,location,cuisines,cost,reviews_list,city,Mean Rating
9701,Quench & Crunch,Yes,No,3.6,Shanti Nagar,"Beverages, Fast Food",150.0,rated 40 ratedn for breakfast ordered veg che...,Church Street,3.32
11761,Mangalore pearl - Seafood Restaurant,Yes,No,4.3,Frazer Town,"Mangalorean, Seafood",700.0,rated 50 ratedn mangalore pearl is the place ...,Frazer Town,4.23
15635,Darshan Paradise Restaurant,Yes,No,3.6,BTM,"North Indian, Chinese",250.0,rated 20 ratedn quality is not gud rated 50 r...,Jayanagar,3.32
25599,Savoury - Sea Shell Restaurant,Yes,No,3.9,BTM,"Arabian, North Indian, Chinese, Fast Food",700.0,rated 10 ratedn not a great reasturant taste...,Koramangala 6th Block,3.71
39656,Vapour Brewpub and Diner,No,Yes,4.5,Sarjapur Road,"North Indian, Continental, Italian",1.4,rated 50 ratedn this is a lovely rooftop brew...,Sarjapur Road,4.54


In [None]:
df_percent['reviews_list'].isnull().sum()

0

## TF-IDF Vectorization:
### TF-IDF is the statistical method of assessing the meaning of a word in a given document. Now, I will use the TF-IDF vectorization on the dataset:
## Cosine similarity:
###  It is a metric used to determine how similar the documents are irrespective of their size.
### Calculating the cosine similarity of each item with every other item in the dataset

In [None]:
df_percent.set_index('name', inplace=True)
indices = pd.Series(df_percent.index)

# Creating tf-idf matrix
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_percent['reviews_list'])

cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

## **Creating Recommendation System**

In [None]:
def recommend(name, cosine_similarities = cosine_similarities):
    
    # Create a list to put top restaurants
    recommend_restaurant = []
    
    # Find the index of the hotel entered
    idx = indices[indices == name].index[0]
    
    # Find the restaurants with a similar cosine-sim value and order them from bigges number
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
    
    # Extract top 30 restaurant indexes with a similar cosine-sim value
    top30_indexes = list(score_series.iloc[0:31].index)
    
    # Names of the top 30 restaurants
    for each in top30_indexes:
        recommend_restaurant.append(list(df_percent.index)[each])
    
    # Creating the new data set to show similar restaurants
    df_new = pd.DataFrame(columns=['cuisines', 'Mean Rating', 'cost'])
  
    # Create the top 30 similar restaurants with some of their columns
    for each in recommend_restaurant:
        df_new = df_new.append(pd.DataFrame(df_percent[['cuisines','Mean Rating', 'cost']][df_percent.index == each].sample()))
    
    # Drop the same named restaurants and sort only the top 10 by the highest rating
    df_new = df_new.drop_duplicates(subset=['cuisines','Mean Rating', 'cost'], keep=False)
    df_new = df_new.sort_values(by='Mean Rating', ascending=False).head(10)
    
    print('TOP %s RESTAURANTS LIKE %s WITH SIMILAR REVIEWS: ' % (str(len(df_new)), name))
    
    return df_new
    


In [None]:
recommend('Pai Vihar')

TOP 10 RESTAURANTS LIKE Pai Vihar WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Gokul Kuteera,"North Indian, Chinese",3.71,650.0
Food And You,"North Indian, South Indian, Gujarati",3.65,350.0
Desi Doze,"North Indian, Fast Food",3.58,400.0
Kakaji,North Indian,3.45,350.0
A2B - Adyar Ananda Bhavan,"South Indian, North Indian, Chinese, Street Fo...",3.05,400.0
A2B - Adyar Ananda Bhavan,"South Indian, Fast Food, Chinese, North Indian",3.05,600.0
Food Point,North Indian,2.94,450.0
Melange - Hotel Ekaa,"North Indian, Chinese, Continental, Mangalorean",2.81,900.0
Punjabi Tasty Khana,"North Indian, Chinese, Biryani",2.68,450.0
Punjabi Dawat,"North Indian, Chinese",2.42,400.0


In [None]:
recommend('Canopy')

TOP 4 RESTAURANTS LIKE Canopy WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Nouvelle Garden,"North Indian, Continental, Italian",3.45,900.0
Sri Sai Mango Tree Restaurant,"North Indian, Biryani, Chinese",3.32,600.0
Melange - Hotel Ekaa,"North Indian, Chinese, Continental, Mangalorean",2.81,900.0
South Parade - The Chancery Hotel,"North Indian, Continental, Chinese",2.68,1.2


In [None]:
recommend('Red Chilliez')

TOP 7 RESTAURANTS LIKE Red Chilliez WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Lion King,"Chinese, Momos",3.58,250.0
Spice Up,Chinese,3.58,600.0
Beijing Bites,"Chinese, Thai",3.36,850.0
Chef in,"Biryani, North Indian, Chinese",3.32,500.0
Red Chilliez,"North Indian, Chinese, Seafood, Mangalorean",3.26,650.0
Chinese Street,Chinese,2.68,650.0
Wangs Kitchen,Chinese,2.14,1.0


In [None]:
recommend('Cinnamon')

TOP 8 RESTAURANTS LIKE Cinnamon WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Madeena Hotel,"North Indian, Mughlai, Biryani",3.75,400.0
Pallavi Restaurant,"Biryani, Chinese, Andhra",3.58,500.0
Donne Biriyani Angadi Mane,"South Indian, Biryani",3.47,250.0
B.M.W - Bhookh Mitaane Wala,"North Indian, South Indian, Chinese",3.42,500.0
Rayalaseema Spice,"Andhra, South Indian, North Indian",3.32,650.0
Desi Dhaba,"North Indian, Chinese, Rolls",3.19,300.0
Bangalore Bytes,"Fast Food, South Indian, Biryani",3.19,300.0
Food Point,North Indian,2.94,450.0


In [None]:
import pickle
pickle.dump(tfidf, open('restaurant.pkl', 'wb'))