### Importing Libraries

In [1]:
#Importing Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')
import re
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

### Loading the dataset

In [2]:
#reading the dataset
zomato_main=pd.read_csv("zomato.csv")

In [3]:
zomato_main.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [4]:
zomato_main.shape

(51717, 17)

In [5]:
zomato_main.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   url                          51717 non-null  object
 1   address                      51717 non-null  object
 2   name                         51717 non-null  object
 3   online_order                 51717 non-null  object
 4   book_table                   51717 non-null  object
 5   rate                         43942 non-null  object
 6   votes                        51717 non-null  int64 
 7   phone                        50509 non-null  object
 8   location                     51696 non-null  object
 9   rest_type                    51490 non-null  object
 10  dish_liked                   23639 non-null  object
 11  cuisines                     51672 non-null  object
 12  approx_cost(for two people)  51371 non-null  object
 13  reviews_list                 51

### Data Cleaning and Feature Engineering

In [6]:
#Deleting Unnnecessary Columns like "url","dish_liked", "phone"
zomato=zomato_main.drop(['url','dish_liked','phone'],axis=1) 

In [7]:
#Checking for Duplicates
zomato.duplicated().sum()

43

In [8]:
#Removing duplicates
zomato.drop_duplicates(inplace=True)

In [9]:
#checking the NaN values
zomato.isnull().sum()

address                           0
name                              0
online_order                      0
book_table                        0
rate                           7767
votes                             0
location                         21
rest_type                       227
cuisines                         45
approx_cost(for two people)     345
reviews_list                      0
menu_item                         0
listed_in(type)                   0
listed_in(city)                   0
dtype: int64

In [10]:
#Removing the NaN values
zomato.dropna(how='any',inplace=True)

In [11]:
#cross-checking
zomato.isnull().sum()

address                        0
name                           0
online_order                   0
book_table                     0
rate                           0
votes                          0
location                       0
rest_type                      0
cuisines                       0
approx_cost(for two people)    0
reviews_list                   0
menu_item                      0
listed_in(type)                0
listed_in(city)                0
dtype: int64

In [12]:
#Reading Column Names
zomato.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'approx_cost(for two people)',
       'reviews_list', 'menu_item', 'listed_in(type)', 'listed_in(city)'],
      dtype='object')

In [13]:
#Renaming the column names for convenience
zomato = zomato.rename(columns={'approx_cost(for two people)':'cost','listed_in(type)':'type',
                                  'listed_in(city)':'city'})
zomato.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'cost', 'reviews_list',
       'menu_item', 'type', 'city'],
      dtype='object')

In [14]:
zomato.dtypes

address         object
name            object
online_order    object
book_table      object
rate            object
votes            int64
location        object
rest_type       object
cuisines        object
cost            object
reviews_list    object
menu_item       object
type            object
city            object
dtype: object

In [15]:
zomato.cost.unique()

array(['800', '300', '600', '700', '550', '500', '450', '650', '400',
       '900', '200', '750', '150', '850', '100', '1,200', '350', '250',
       '950', '1,000', '1,500', '1,300', '199', '80', '1,100', '160',
       '1,600', '230', '130', '1,700', '1,400', '1,350', '2,200', '2,000',
       '1,800', '1,900', '180', '330', '2,500', '2,100', '3,000', '2,800',
       '3,400', '50', '40', '1,250', '3,500', '4,000', '2,400', '2,600',
       '1,450', '70', '3,200', '560', '240', '360', '6,000', '1,050',
       '2,300', '4,100', '120', '5,000', '3,700', '1,650', '2,700',
       '4,500'], dtype=object)

In [16]:
#Changing 'cost' from object to float data type

zomato['cost'] = zomato['cost'].astype(str) #Changing the cost to string
zomato['cost'] = zomato['cost'].apply(lambda x: x.replace(',','.')) #Using lambda function to replace ',' from cost
zomato['cost'] = zomato['cost'].astype(float) # Changing the cost to Float


In [17]:
zomato['cost'].dtype

dtype('float64')

In [18]:
#Reading Rate of dataset
zomato['rate'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [19]:
#Removing '/5' from Rates
zomato = zomato.loc[zomato.rate !='NEW']
zomato = zomato.loc[zomato.rate !='-'].reset_index(drop=True)
remove_slash = lambda x: x.replace('/5', '') if type(x) == np.str else x
zomato.rate = zomato.rate.apply(remove_slash).str.strip().astype('float')
zomato['rate'].head()

0    4.1
1    4.1
2    3.8
3    3.7
4    3.8
Name: rate, dtype: float64

In [20]:
# Changing the column names
zomato.name = zomato.name.apply(lambda x:x.title())

In [21]:
#Replacing Yes,No to True,False

zomato.online_order.replace(('Yes','No'),(True, False),inplace=True)
zomato.book_table.replace(('Yes','No'),(True, False),inplace=True)

In [22]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [23]:
## Computing Mean Rating and Adding a new column
all_restaurants = list(zomato['name'].unique())
zomato['Mean Rating'] = 0

for i in range(len(all_restaurants)):
    zomato['Mean Rating'][zomato['name'] == all_restaurants[i]] = zomato['rate'][zomato['name'] == all_restaurants[i]].mean()

In [24]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,4.118182
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,4.1
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.8
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.7
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari,3.8


In [25]:
#Scaling to Mean Rating to 2 decimal figures
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range = (1,5))

zomato[['Mean Rating']] = scaler.fit_transform(zomato[['Mean Rating']]).round(2)

In [26]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,3.99
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,3.97
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.58
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.45
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari,3.58


In [27]:
zomato.shape

(41237, 15)

In [28]:
## Text Preprocessing of Reviews 

Some of the common text preprocessing / cleaning steps are:

 - Lower casing
 - Removal of Punctuations
 - Removal of Stopwords
 - Removal of URLs
 - Spelling correction

In [29]:
# Before text processing:
zomato['reviews_list']

0        [('Rated 4.0', 'RATED\n  A beautiful place to ...
1        [('Rated 4.0', 'RATED\n  Had been here for din...
2        [('Rated 3.0', "RATED\n  Ambience is not that ...
3        [('Rated 4.0', "RATED\n  Great food and proper...
4        [('Rated 4.0', 'RATED\n  Very good restaurant ...
                               ...                        
41232    [('Rated 4.0', 'RATED\n  Ambience- Big and spa...
41233    [('Rated 4.0', 'RATED\n  A fine place to chill...
41234    [('Rated 5.0', "RATED\n  Food and service are ...
41235    [('Rated 4.0', 'RATED\n  Nice and friendly pla...
41236    [('Rated 5.0', 'RATED\n  Great ambience , look...
Name: reviews_list, Length: 41237, dtype: object

In [30]:
## Lower Casing
zomato["reviews_list"] = zomato["reviews_list"].str.lower()

In [31]:
## Removal of Puctuations

import string
PUNCT_TO_REMOVE = string.punctuation

"""custom function to remove the punctuation"""
def remove_punctuation(text):
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_punctuation(text))

In [32]:
## Removal of Stopwords

from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))
  
"""custom function to remove the stopwords"""
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_stopwords(text))

In [33]:
## Removal of URLS
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_urls(text))

In [34]:
#After text processing
zomato['reviews_list']

0        rated 40 ratedn beautiful place dine inthe int...
1        rated 40 ratedn dinner family turned good choo...
2        rated 30 ratedn ambience good enough pocket fr...
3        rated 40 ratedn great food proper karnataka st...
4        rated 40 ratedn good restaurant neighbourhood ...
                               ...                        
41232    rated 40 ratedn ambience big spacious lawn use...
41233    rated 40 ratedn fine place chill office hours ...
41234    rated 50 ratedn food service incomparably exce...
41235    rated 40 ratedn nice friendly place staff awes...
41236    rated 50 ratedn great ambience looking nice go...
Name: reviews_list, Length: 41237, dtype: object

In [35]:
# RESTAURANT NAMES:
restaurant_names = list(zomato['name'].unique())
restaurant_names

['Jalsa',
 'Spice Elephant',
 'San Churro Cafe',
 'Addhuri Udupi Bhojana',
 'Grand Village',
 'Timepass Dinner',
 'Rosewood International Hotel - Bar & Restaurant',
 'Onesta',
 'Penthouse Cafe',
 'Smacznego',
 'Cafã\x83Â\x83Ã\x82Â\x83Ã\x83Â\x82Ã\x82Â\x83Ã\x83Â\x83Ã\x82Â\x82Ã\x83Â\x82Ã\x82Â© Down The Alley',
 'Cafe Shuffle',
 'The Coffee Shack',
 'Caf-Eleven',
 'Cafe Vivacity',
 'Catch-Up-Ino',
 "Kirthi'S Biryani",
 'T3H Cafe',
 '360 Atoms Restaurant And Cafe',
 'The Vintage Cafe',
 'Woodee Pizza',
 'Cafe Coffee Day',
 'My Tea House',
 'Hide Out Cafe',
 'Cafe Nova',
 'Coffee Tindi',
 'Sea Green Cafe',
 'Cuppa',
 "Srinathji'S Cafe",
 'Redberrys',
 'Foodiction',
 'Sweet Truth',
 'Ovenstory Pizza',
 'Faasos',
 'Behrouz Biryani',
 'Fast And Fresh',
 'Szechuan Dragon',
 'Empire Restaurant',
 'Maruthi Davangere Benne Dosa',
 'Chaatimes',
 'Havyaka Mess',
 "Mcdonald'S",
 "Domino'S Pizza",
 'Hotboxit',
 'Kitchen Garden',
 'Recipe',
 'Beijing Bites',
 'Tasty Bytes',
 'Petoo',
 'Shree Cool Point'

In [36]:
#function for 
def get_top_words(column, top_nu_of_words, nu_of_word):
    
    vec = CountVectorizer(ngram_range= nu_of_word, stop_words='english')
    
    bag_of_words = vec.fit_transform(column)
    
    sum_words = bag_of_words.sum(axis=0)
    
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    
    return words_freq[:top_nu_of_words]

In [37]:
zomato.sample(10)

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
11523,"The Hatworks Boulevard, 32, Cunningham Road, B...",The Culinarium,False,False,4.0,25,Vasanth Nagar,"Cafe, Dessert Parlor","Cafe, Desserts, Beverages",1.1,rated 50 ratedn best pot pies fish chips town ...,[],Cafes,Frazer Town,3.77
2847,"39/3C, Banashankari, Bangalore",New Imperial Restaurant,False,False,3.7,39,Banashankari,Casual Dining,"North Indian, Chinese",450.0,rated 50 ratedn regular coustmer imperial ways...,[],Dine-out,Basavanagudi,3.45
9477,"3rd Floor, Guarda Mall, Ashok Nagar, Near Brig...",Vinny'S,True,False,3.9,205,Brigade Road,Casual Dining,"Pizza, Italian, Continental, Desserts",800.0,rated 30 ratedn good place grab quick biteespe...,[],Delivery,Church Street,3.87
7922,"Koramangala , Bangalore",Midnight Hunger Solutions 24/7,False,False,3.2,11,Koramangala 5th Block,Delivery,"North Indian, Mughlai, Chinese",500.0,rated 50 ratedn rolls awesome rated 10 ratedn ...,[],Delivery,BTM,1.72
28983,"8th Cross, Beside Goverment Hospital, Wilson G...",Bake Land,False,False,3.2,15,Wilson Garden,Bakery,Bakery,150.0,rated 30 ratedn located opposite 3 spicebakela...,[],Desserts,Koramangala 7th Block,2.81
6387,"3rd Cross Road, Sai Baba Temple Road, Green Ga...",Aahar Vihar,True,False,4.0,68,Marathahalli,Quick Bites,North Indian,250.0,rated 40 ratedn small place well known crowded...,[],Dine-out,Brookefield,3.84
11078,"34/2, Gollahalli Village, Hebbagodi, Near TATA...",Mr. Meetharam,True,False,3.3,7,Electronic City,Quick Bites,"Mithai, Street Food",150.0,rated 10 ratedn bought kalakand un hygenic hai...,[],Desserts,Electronic City,2.84
15632,"360, 2nd Floor, Near HDFC Bank,7th Block, Kora...",Axomi - Taste Of Assam,True,False,4.1,613,Koramangala 7th Block,Casual Dining,Assamese,500.0,rated 40 ratedn itãx83ãx83ãx82ãx82ãx83ãx...,"['Chicken Thali', 'Egg Thali', 'Mini Thali', '...",Delivery,Jayanagar,3.95
31963,"Sheraton Grand, Level 4, A Block, 26/1, Dr Raj...",Art Cafe,False,False,3.5,7,Malleshwaram,Cafe,Cafe,1.0,,[],Dine-out,Malleshwaram,3.19
40831,"Near MVJ Collage, Chansandra Main Road, Whitef...",Urban Grills Restaurant,False,False,3.5,45,Whitefield,Casual Dining,"Arabian, North Indian, Biryani",700.0,rated 40 ratedn good quality shawarma shawarma...,[],Dine-out,Whitefield,3.19


In [38]:
zomato.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'cost', 'reviews_list',
       'menu_item', 'type', 'city', 'Mean Rating'],
      dtype='object')

In [39]:
#Dropping columns that are not much important
zomato=zomato.drop(['address','rest_type', 'type', 'menu_item', 'votes'],axis=1)

In [40]:
import pandas

# Randomly sample 60% of your dataframe
df = zomato.sample(frac=0.5)

In [41]:
df.shape

(20618, 10)

## Building Recommendation System

### Term Frequency-Inverse Document Frequency


In [42]:
#set column 'name' as index
df.set_index('name', inplace=True)

In [43]:
indices = pd.Series(df.index)

In [44]:
# Creating tf-idf matrix
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['reviews_list'])

In [45]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [46]:
def recommend(name, cosine_similarities = cosine_similarities):
    
    # Create a list to put top restaurants
    recommend_restaurant = []
    
    # Find the index of the hotel entered
    idx = indices[indices == name].index[0]
    
    # Find the restaurants with a similar cosine-sim value and order them from bigges number
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
    
    # Extract top 30 restaurant indexes with a similar cosine-sim value
    top30_indexes = list(score_series.iloc[0:31].index)
    
    # Names of the top 30 restaurants
    for each in top30_indexes:
        recommend_restaurant.append(list(df.index)[each])
    
    # Creating the new data set to show similar restaurants
    df_new = pd.DataFrame(columns=['cuisines', 'Mean Rating', 'cost'])
    
    # Create the top 30 similar restaurants with some of their columns
    for each in recommend_restaurant:
        df_new = df_new.append(pd.DataFrame(df[['cuisines','Mean Rating', 'cost']][df.index == each].sample()))
    
    # Drop the same named restaurants and sort only the top 10 by the highest rating
    df_new = df_new.drop_duplicates(subset=['cuisines','Mean Rating', 'cost'], keep=False)
    df_new = df_new.sort_values(by='Mean Rating', ascending=False).head(10)
    
    print('TOP %s RESTAURANTS LIKE " %s " WITH SIMILAR REVIEWS: ' % (str(len(df_new)), name.upper()))
    
    return df_new

In [47]:
#getting details of a random restaurant
df[df.index == 'Knight Ryders'].head()

Unnamed: 0_level_0,online_order,book_table,rate,location,cuisines,cost,reviews_list,city,Mean Rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Knight Ryders,True,False,3.4,BTM,"North Indian, Chinese",400.0,rated 50 ratedn forgot give ratings person del...,Koramangala 4th Block,3.06
Knight Ryders,True,False,3.4,BTM,"North Indian, Chinese",400.0,rated 50 ratedn forgot give ratings person del...,Koramangala 5th Block,3.06
Knight Ryders,True,False,3.4,BTM,"North Indian, Chinese",400.0,rated 50 ratedn forgot give ratings person del...,JP Nagar,3.06
Knight Ryders,True,False,3.4,BTM,"North Indian, Chinese",400.0,rated 50 ratedn forgot give ratings person del...,Jayanagar,3.06


In [48]:
recommend('Knight Ryders')

TOP 10 RESTAURANTS LIKE " KNIGHT RYDERS " WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Al Sadique,"North Indian, Mughlai, Chinese, Rolls",3.71,450.0
Donne Biriyani Angadi Mane,"South Indian, Biryani",3.47,250.0
Hotel New Karavali,"Mangalorean, South Indian, North Indian",3.34,300.0
Sri Krishna Sagar,"North Indian, Chinese",3.26,400.0
Bendakaluru Bytes,Fast Food,2.81,300.0
Sri Lakshmi Dhaba,"North Indian, Chinese",2.5,300.0
Yummy Punjabi,"North Indian, Chinese",2.5,400.0
Biryani Feast,"Biryani, North Indian, Chinese",2.42,600.0
Kabab Treat,"North Indian, Chinese",2.29,500.0
Ruchi'S Corner,Fast Food,2.16,200.0


In [49]:
recommend('Red Chilliez')

TOP 8 RESTAURANTS LIKE " RED CHILLIEZ " WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Yo! Chow,"Chinese, Momos",4.35,800.0
Eggzotic,"North Indian, Chinese, Biryani, Fast Food",3.77,500.0
Dinepost9,"North Indian, South Indian, Chinese, Continental",3.67,450.0
Cinnamon,"North Indian, Chinese, Biryani",3.62,550.0
Magix'S Parattha Roll,"Fast Food, North Indian, Chinese, Mughlai, Rolls",3.52,400.0
Chef In,"Biryani, North Indian, Chinese",3.32,500.0
Dilli Darbar,"North Indian, Chinese, Biryani",3.13,500.0
Wazir'S,"North Indian, Chinese",2.94,500.0


In [50]:
recommend('Cinnamon')

TOP 8 RESTAURANTS LIKE " CINNAMON " WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Communiti,"Continental, BBQ, Salad",4.67,1.5
Truffles,"Cafe, American, Burger, Steak",4.61,900.0
Ciclo Cafe,"Cafe, Italian, American",4.23,1.0
Hoot,"Continental, Italian, North Indian",4.1,1.4
Savji'S Roll 'N' Biryani,"North Indian, Chinese, Biryani, Rolls",3.71,250.0
Cinnamon,"North Indian, Asian, Continental",3.62,1.0
Dhaba Express,"Biryani, Fast Food, North Indian, Chinese",3.45,400.0
Kfc,"Burger, Fast Food",3.38,400.0


In [51]:
recommend('Spice Up')

TOP 9 RESTAURANTS LIKE " SPICE UP " WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Eggzotic,"North Indian, Chinese, Biryani, Fast Food",3.77,500.0
Cinnamon,"North Indian, Chinese, Biryani",3.62,550.0
Raichur Biryani House,"Biryani, North Indian, Chinese",3.58,400.0
Hotel Manu Residency,"Andhra, North Indian, Chinese, Biryani",3.58,550.0
Donne Biriyani Angadi Mane,"South Indian, Biryani",3.47,250.0
Chef In,"Biryani, North Indian, Chinese",3.32,500.0
Ruchie Rich,"Biryani, Fast Food, North Indian",3.19,200.0
Food Point,"North Indian, Chinese",2.94,300.0
Kabab Treat,"North Indian, Chinese",2.29,500.0


In [52]:
recommend('Desi Doze')

TOP 10 RESTAURANTS LIKE " DESI DOZE " WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Eggzotic,"North Indian, Chinese, Biryani, Fast Food",3.77,500.0
Cinnamon,"North Indian, Chinese, Biryani",3.62,550.0
Donne Biriyani Angadi Mane,"Biryani, Chinese",3.47,250.0
Sri Krishna Sagar,South Indian,3.26,300.0
Bangalore Bytes,"Fast Food, South Indian, Biryani",3.19,300.0
Ruchie Rich,"Biryani, Fast Food, North Indian",3.19,200.0
Food Point,North Indian,2.94,450.0
Punjabi Tasty Khana,"North Indian, Chinese, Biryani",2.68,450.0
Taza Khaana,"Chinese, North Indian",2.63,450.0
Sri Lakshmi Dhaba,"North Indian, Chinese",2.5,300.0


In [53]:
import pickle
pickle.dump(recommend,open('model.pkl','wb'))