# Zomato Selection

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn import set_config
set_config(display = 'diagram')

## Data collection and inspection

In [3]:
data = pd.read_csv('zomato.csv',  dtype = {'rate':'string', 'reviews_list':'string'}, nrows = 10000)
data.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [4]:
data.drop(['address', 'phone', 'menu_item'], axis = 1, inplace = True)

In [5]:
data.shape

(10000, 14)

In [6]:
data.isnull().sum()

url                               0
name                              0
online_order                      0
book_table                        0
rate                           1372
votes                             0
location                          2
rest_type                        51
dish_liked                     5550
cuisines                         10
approx_cost(for two people)      20
reviews_list                      0
listed_in(type)                   0
listed_in(city)                   0
dtype: int64

In [7]:
data.describe()

Unnamed: 0,votes
count,10000.0
mean,245.5692
std,711.891229
min,0.0
25%,7.0
50%,41.0
75%,178.0
max,16345.0


In [8]:
data.sample(5)

Unnamed: 0,url,name,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city)
6899,https://www.zomato.com/bangalore/dock-frostd-m...,Dock Frost'd,Yes,No,4.6/5,110,Marathahalli,"Beverage Shop, Dessert Parlor","Kala Jamun, Fruit Shake, Waffles, Chocolate De...","Beverages, Desserts",400,"[('Rated 4.0', 'RATED\n We had an ice cream j...",Delivery,Brookefield
2558,https://www.zomato.com/bangalore/kolkata-kathi...,Kolkata Kathi Rolls,Yes,No,3.8/5,23,JP Nagar,Quick Bites,,Rolls,150,"[('Rated 5.0', 'RATED\n If someone loves Beng...",Delivery,Basavanagudi
943,https://www.zomato.com/bangalore/midnight-mani...,Midnight Mania,Yes,No,4.1/5,601,BTM,Quick Bites,"Peri Peri Chicken, Chicken Pizza, White Pasta,...","Pizza, Italian, Beverages",700,"[('Rated 5.0', 'RATED\n Order from zomato . O...",Delivery,Bannerghatta Road
1494,https://www.zomato.com/bangalore/cafe-arabica-...,Cafe Arabica,No,No,3.8/5,69,Bannerghatta Road,"Cafe, Bakery","Coffee, French Fries, Lemon Tea, Veg Sandwich","Cafe, Bakery",700,"[('Rated 3.0', 'RATED\n Went to have dessert....",Delivery,Bannerghatta Road
1556,https://www.zomato.com/bangalore/kanchan-dhaba...,Kanchan Dhaba,No,No,3.2/5,4,Bannerghatta Road,Quick Bites,,"North Indian, Chinese",350,"[('Rated 2.0', 'RATED\n ordered chicken do pe...",Delivery,Bannerghatta Road


In [9]:
data.duplicated().sum()

0

## Handling missing values

In [10]:
data.isnull().mean()*100

url                             0.00
name                            0.00
online_order                    0.00
book_table                      0.00
rate                           13.72
votes                           0.00
location                        0.02
rest_type                       0.51
dish_liked                     55.50
cuisines                        0.10
approx_cost(for two people)     0.20
reviews_list                    0.00
listed_in(type)                 0.00
listed_in(city)                 0.00
dtype: float64

In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

In [12]:
trf = ColumnTransformer(transformers = [
    ('imp', SimpleImputer(strategy = 'most_frequent'), ['location', 'rest_type', 'cuisines'])
])

In [13]:
transformed_data = trf.fit_transform(data)

In [14]:
transformed_df = pd.DataFrame(transformed_data, columns=['updated_location', 'updated_rest_type', 'updated_cuisines'])


data = pd.concat([data, transformed_df], axis=1)

In [15]:
data['updated_cuisines'].isna().sum()

0

In [16]:
data.head()

Unnamed: 0,url,name,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city),updated_location,updated_rest_type,updated_cuisines
0,https://www.zomato.com/bangalore/jalsa-banasha...,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",Buffet,Banashankari,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese"
1,https://www.zomato.com/bangalore/spice-elephan...,Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",Buffet,Banashankari,Banashankari,Casual Dining,"Chinese, North Indian, Thai"
2,https://www.zomato.com/SanchurroBangalore?cont...,San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",Buffet,Banashankari,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian"
3,https://www.zomato.com/bangalore/addhuri-udupi...,Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",Buffet,Banashankari,Banashankari,Quick Bites,"South Indian, North Indian"
4,https://www.zomato.com/bangalore/grand-village...,Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",Buffet,Banashankari,Basavanagudi,Casual Dining,"North Indian, Rajasthani"


In [17]:
data['dish_liked'] = np.where(data['dish_liked'].isnull(), data['updated_location'], data['dish_liked'])


In [18]:
data.drop(['location', 'rest_type', 'cuisines', 'url'], axis = 1, inplace = True)

In [19]:
data.isnull().sum()

name                              0
online_order                      0
book_table                        0
rate                           1372
votes                             0
dish_liked                        0
approx_cost(for two people)      20
reviews_list                      0
listed_in(type)                   0
listed_in(city)                   0
updated_location                  0
updated_rest_type                 0
updated_cuisines                  0
dtype: int64

In [20]:
data['updated_rest_type'].value_counts()

updated_rest_type
Quick Bites                   3733
Casual Dining                 2103
Cafe                           705
Delivery                       527
Dessert Parlor                 460
                              ... 
Bhojanalya                       1
Casual Dining, Quick Bites       1
Pop Up                           1
Sweet Shop, Dessert Parlor       1
Casual Dining, Lounge            1
Name: count, Length: 80, dtype: int64

In [21]:
rows = data.shape[0]

## Data Wrangling

In [22]:
data.head(6)


Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city),updated_location,updated_rest_type,updated_cuisines
0,Jalsa,Yes,Yes,4.1/5,775,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",Buffet,Banashankari,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese"
1,Spice Elephant,Yes,No,4.1/5,787,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...",800,"[('Rated 4.0', 'RATED\n Had been here for din...",Buffet,Banashankari,Banashankari,Casual Dining,"Chinese, North Indian, Thai"
2,San Churro Cafe,Yes,No,3.8/5,918,"Churros, Cannelloni, Minestrone Soup, Hot Choc...",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",Buffet,Banashankari,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian"
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,Masala Dosa,300,"[('Rated 4.0', ""RATED\n Great food and proper...",Buffet,Banashankari,Banashankari,Quick Bites,"South Indian, North Indian"
4,Grand Village,No,No,3.8/5,166,"Panipuri, Gol Gappe",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",Buffet,Banashankari,Basavanagudi,Casual Dining,"North Indian, Rajasthani"
5,Timepass Dinner,Yes,No,3.8/5,286,"Onion Rings, Pasta, Kadhai Paneer, Salads, Sal...",600,"[('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/...",Buffet,Banashankari,Basavanagudi,Casual Dining,North Indian


In [23]:
data.isnull().sum()

name                              0
online_order                      0
book_table                        0
rate                           1372
votes                             0
dish_liked                        0
approx_cost(for two people)      20
reviews_list                      0
listed_in(type)                   0
listed_in(city)                   0
updated_location                  0
updated_rest_type                 0
updated_cuisines                  0
dtype: int64

In [24]:
data['rate'] = np.where(data['rate'].isna() == True, '0/5', data['rate'])  

In [25]:
data['rate'] = np.where(data['rate'] == 'NEW', '0/5', data['rate'])

In [26]:
data['rate'] = data['rate'].apply(lambda x: x.split('/')[0])

In [27]:
data.head(10)

Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city),updated_location,updated_rest_type,updated_cuisines
0,Jalsa,Yes,Yes,4.1,775,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",Buffet,Banashankari,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese"
1,Spice Elephant,Yes,No,4.1,787,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...",800,"[('Rated 4.0', 'RATED\n Had been here for din...",Buffet,Banashankari,Banashankari,Casual Dining,"Chinese, North Indian, Thai"
2,San Churro Cafe,Yes,No,3.8,918,"Churros, Cannelloni, Minestrone Soup, Hot Choc...",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",Buffet,Banashankari,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian"
3,Addhuri Udupi Bhojana,No,No,3.7,88,Masala Dosa,300,"[('Rated 4.0', ""RATED\n Great food and proper...",Buffet,Banashankari,Banashankari,Quick Bites,"South Indian, North Indian"
4,Grand Village,No,No,3.8,166,"Panipuri, Gol Gappe",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",Buffet,Banashankari,Basavanagudi,Casual Dining,"North Indian, Rajasthani"
5,Timepass Dinner,Yes,No,3.8,286,"Onion Rings, Pasta, Kadhai Paneer, Salads, Sal...",600,"[('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/...",Buffet,Banashankari,Basavanagudi,Casual Dining,North Indian
6,Rosewood International Hotel - Bar & Restaurant,No,No,3.6,8,Mysore Road,800,"[('Rated 5.0', 'RATED\n Awesome food ??Great ...",Buffet,Banashankari,Mysore Road,Casual Dining,"North Indian, South Indian, Andhra, Chinese"
7,Onesta,Yes,Yes,4.6,2556,"Farmhouse Pizza, Chocolate Banana, Virgin Moji...",600,"[('Rated 5.0', 'RATED\n I personally really l...",Cafes,Banashankari,Banashankari,"Casual Dining, Cafe","Pizza, Cafe, Italian"
8,Penthouse Cafe,Yes,No,4.0,324,"Pizza, Mocktails, Coffee, Nachos, Salad, Pasta...",700,"[('Rated 3.0', ""RATED\n I had been to this pl...",Cafes,Banashankari,Banashankari,Cafe,"Cafe, Italian, Continental"
9,Smacznego,Yes,No,4.2,504,"Waffles, Pasta, Coleslaw Sandwich, Choco Waffl...",550,"[('Rated 4.0', ""RATED\n Easy to locate\nVFM 3...",Cafes,Banashankari,Banashankari,Cafe,"Cafe, Mexican, Italian, Momos, Beverages"


In [28]:
stripoff = '\"[(\'\"\"'

In [29]:
one_rest = data['reviews_list'][5]
len(one_rest.split('),'))

1

In [30]:
one_review = one_rest.split('),')[0].strip(stripoff)
one_review

"Rated 3.0', 'RATED\\n  Food 3/5\\nAmbience 3/5\\nService 3/5\\n\\nHad been here for family lunch. This place serves buffets too. We ordered soups, babycorn starters, butter naan, kadai panner, veg kohlapuri, pease pulav. Food was good. But the service is too slow. They will make you wait for 20 mins to get the order. This place is apt for family hangout.\\n\\nCheers.')]"

In [31]:
one_rating = one_review.split(',')[0][-4]

In [32]:
one_description = one_review.split(',')[1][9:]

In [33]:
one_rating

'3'

In [34]:
one_description

'  Food 3/5\\nAmbience 3/5\\nService 3/5\\n\\nHad been here for family lunch. This place serves buffets too. We ordered soups'

In [35]:
data.head(6)

Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city),updated_location,updated_rest_type,updated_cuisines
0,Jalsa,Yes,Yes,4.1,775,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",Buffet,Banashankari,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese"
1,Spice Elephant,Yes,No,4.1,787,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...",800,"[('Rated 4.0', 'RATED\n Had been here for din...",Buffet,Banashankari,Banashankari,Casual Dining,"Chinese, North Indian, Thai"
2,San Churro Cafe,Yes,No,3.8,918,"Churros, Cannelloni, Minestrone Soup, Hot Choc...",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",Buffet,Banashankari,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian"
3,Addhuri Udupi Bhojana,No,No,3.7,88,Masala Dosa,300,"[('Rated 4.0', ""RATED\n Great food and proper...",Buffet,Banashankari,Banashankari,Quick Bites,"South Indian, North Indian"
4,Grand Village,No,No,3.8,166,"Panipuri, Gol Gappe",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",Buffet,Banashankari,Basavanagudi,Casual Dining,"North Indian, Rajasthani"
5,Timepass Dinner,Yes,No,3.8,286,"Onion Rings, Pasta, Kadhai Paneer, Salads, Sal...",600,"[('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/...",Buffet,Banashankari,Basavanagudi,Casual Dining,North Indian


In [36]:
data['dish_liked'][5]

'Onion Rings, Pasta, Kadhai Paneer, Salads, Salad, Roti, Jeera Rice'

In [37]:
def convert_fn(i):
        one_rest = i
        total_reviews = len(one_rest.split('), ('))
        if total_reviews>2:
            for j in range(1):
                try:
                    one_review = one_rest.split('), (')[j].strip(stripoff)
                    one_rating = one_review.split('\',')[0].split()[1]
                    one_description = one_review.split('\',')[1].split('RATED\\n')[1].split('.')
                    return (one_description)
                except IndexError:
                    print()
      
            

In [38]:
data['reviews_list'] = data['reviews_list'].apply(convert_fn)









In [39]:
data.head(6)

Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city),updated_location,updated_rest_type,updated_cuisines
0,Jalsa,Yes,Yes,4.1,775,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...",800,"[ A beautiful place to dine in, The interiors...",Buffet,Banashankari,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese"
1,Spice Elephant,Yes,No,4.1,787,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...",800,"[ Had been here for dinner with family, Turn...",Buffet,Banashankari,Banashankari,Casual Dining,"Chinese, North Indian, Thai"
2,San Churro Cafe,Yes,No,3.8,918,"Churros, Cannelloni, Minestrone Soup, Hot Choc...",800,[ Ambience is not that good enough and it's n...,Buffet,Banashankari,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian"
3,Addhuri Udupi Bhojana,No,No,3.7,88,Masala Dosa,300,[ Great food and proper Karnataka style full ...,Buffet,Banashankari,Banashankari,Quick Bites,"South Indian, North Indian"
4,Grand Village,No,No,3.8,166,"Panipuri, Gol Gappe",600,,Buffet,Banashankari,Basavanagudi,Casual Dining,"North Indian, Rajasthani"
5,Timepass Dinner,Yes,No,3.8,286,"Onion Rings, Pasta, Kadhai Paneer, Salads, Sal...",600,,Buffet,Banashankari,Basavanagudi,Casual Dining,North Indian


In [40]:
data.isnull().sum()

name                              0
online_order                      0
book_table                        0
rate                              0
votes                             0
dish_liked                        0
approx_cost(for two people)      20
reviews_list                   4299
listed_in(type)                   0
listed_in(city)                   0
updated_location                  0
updated_rest_type                 0
updated_cuisines                  0
dtype: int64

In [41]:
data['updated_location'].value_counts()

updated_location
Bannerghatta Road     886
BTM                   864
Banashankari          765
JP Nagar              749
Jayanagar             658
                     ... 
Kammanahalli            1
Seshadripuram           1
Malleshwaram            1
Jeevan Bhima Nagar      1
Rajajinagar             1
Name: count, Length: 72, dtype: int64

In [42]:
data['dish_liked'] = data['dish_liked'].apply(lambda x: x.replace(" ", ""))
data['dish_liked'] = data['dish_liked'].str.split(',')

In [43]:
data['listed_in(type)'] = data['listed_in(type)'].apply(lambda x: x.replace(" ", ""))
data['listed_in(type)'] = data['listed_in(type)'].str.split(',')

In [44]:
data['listed_in(city)'] = data['listed_in(city)'].apply(lambda x: x.replace(" ", ""))
data['listed_in(city)'] = data['listed_in(city)'].str.split(',')

In [45]:
data['updated_location'] = data['updated_location'].apply(lambda x: x.replace(" ", ""))
data['updated_location'] = data['updated_location'].str.split(',')

In [46]:
data['updated_rest_type'] = data['updated_rest_type'].apply(lambda x: x.replace(" ", ""))
data['updated_rest_type'] = data['updated_rest_type'].str.split(',')

In [47]:
data['updated_cuisines'] = data['updated_cuisines'].apply(lambda x: x.replace(" ", ""))
data['updated_cuisines'] = data['updated_cuisines'].str.split(',')

In [48]:
data.head(6)

Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city),updated_location,updated_rest_type,updated_cuisines
0,Jalsa,Yes,Yes,4.1,775,"[Pasta, LunchBuffet, MasalaPapad, PaneerLajawa...",800,"[ A beautiful place to dine in, The interiors...",[Buffet],[Banashankari],[Banashankari],[CasualDining],"[NorthIndian, Mughlai, Chinese]"
1,Spice Elephant,Yes,No,4.1,787,"[Momos, LunchBuffet, ChocolateNirvana, ThaiGre...",800,"[ Had been here for dinner with family, Turn...",[Buffet],[Banashankari],[Banashankari],[CasualDining],"[Chinese, NorthIndian, Thai]"
2,San Churro Cafe,Yes,No,3.8,918,"[Churros, Cannelloni, MinestroneSoup, HotChoco...",800,[ Ambience is not that good enough and it's n...,[Buffet],[Banashankari],[Banashankari],"[Cafe, CasualDining]","[Cafe, Mexican, Italian]"
3,Addhuri Udupi Bhojana,No,No,3.7,88,[MasalaDosa],300,[ Great food and proper Karnataka style full ...,[Buffet],[Banashankari],[Banashankari],[QuickBites],"[SouthIndian, NorthIndian]"
4,Grand Village,No,No,3.8,166,"[Panipuri, GolGappe]",600,,[Buffet],[Banashankari],[Basavanagudi],[CasualDining],"[NorthIndian, Rajasthani]"
5,Timepass Dinner,Yes,No,3.8,286,"[OnionRings, Pasta, KadhaiPaneer, Salads, Sala...",600,,[Buffet],[Banashankari],[Basavanagudi],[CasualDining],[NorthIndian]


In [49]:
data.isnull().sum()

name                              0
online_order                      0
book_table                        0
rate                              0
votes                             0
dish_liked                        0
approx_cost(for two people)      20
reviews_list                   4299
listed_in(type)                   0
listed_in(city)                   0
updated_location                  0
updated_rest_type                 0
updated_cuisines                  0
dtype: int64

In [50]:
data['dish_liked']

0       [Pasta, LunchBuffet, MasalaPapad, PaneerLajawa...
1       [Momos, LunchBuffet, ChocolateNirvana, ThaiGre...
2       [Churros, Cannelloni, MinestroneSoup, HotChoco...
3                                            [MasalaDosa]
4                                    [Panipuri, GolGappe]
                              ...                        
9995                                        [Rajajinagar]
9996                                   [BannerghattaRoad]
9997         [Coffee, FrenchFries, LemonTea, VegSandwich]
9998                                [Koramangala1stBlock]
9999                                [Koramangala5thBlock]
Name: dish_liked, Length: 10000, dtype: object

In [51]:
data.head(7)

Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),reviews_list,listed_in(type),listed_in(city),updated_location,updated_rest_type,updated_cuisines
0,Jalsa,Yes,Yes,4.1,775,"[Pasta, LunchBuffet, MasalaPapad, PaneerLajawa...",800,"[ A beautiful place to dine in, The interiors...",[Buffet],[Banashankari],[Banashankari],[CasualDining],"[NorthIndian, Mughlai, Chinese]"
1,Spice Elephant,Yes,No,4.1,787,"[Momos, LunchBuffet, ChocolateNirvana, ThaiGre...",800,"[ Had been here for dinner with family, Turn...",[Buffet],[Banashankari],[Banashankari],[CasualDining],"[Chinese, NorthIndian, Thai]"
2,San Churro Cafe,Yes,No,3.8,918,"[Churros, Cannelloni, MinestroneSoup, HotChoco...",800,[ Ambience is not that good enough and it's n...,[Buffet],[Banashankari],[Banashankari],"[Cafe, CasualDining]","[Cafe, Mexican, Italian]"
3,Addhuri Udupi Bhojana,No,No,3.7,88,[MasalaDosa],300,[ Great food and proper Karnataka style full ...,[Buffet],[Banashankari],[Banashankari],[QuickBites],"[SouthIndian, NorthIndian]"
4,Grand Village,No,No,3.8,166,"[Panipuri, GolGappe]",600,,[Buffet],[Banashankari],[Basavanagudi],[CasualDining],"[NorthIndian, Rajasthani]"
5,Timepass Dinner,Yes,No,3.8,286,"[OnionRings, Pasta, KadhaiPaneer, Salads, Sala...",600,,[Buffet],[Banashankari],[Basavanagudi],[CasualDining],[NorthIndian]
6,Rosewood International Hotel - Bar & Restaurant,No,No,3.6,8,[MysoreRoad],800,,[Buffet],[Banashankari],[MysoreRoad],[CasualDining],"[NorthIndian, SouthIndian, Andhra, Chinese]"


In [52]:
s1 = data['listed_in(type)'] + data['listed_in(city)'] + data['updated_location'] + data['updated_rest_type'] + data['updated_cuisines']
s2 = data['dish_liked'] + data['reviews_list'] + data['listed_in(type)'] + data['listed_in(city)'] + data['updated_location'] + data['updated_rest_type'] + data['updated_cuisines']

data['tags'] = np.where(data['reviews_list'].isnull(), s1, s2)



In [53]:
new_df = data.drop(['reviews_list' ,'listed_in(type)', 'listed_in(city)', 'updated_location', 'updated_rest_type', 'updated_cuisines'], axis = 1)

### Tag Formation

In [54]:
new_df.head()

Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),tags
0,Jalsa,Yes,Yes,4.1,775,"[Pasta, LunchBuffet, MasalaPapad, PaneerLajawa...",800,"[Pasta, LunchBuffet, MasalaPapad, PaneerLajawa..."
1,Spice Elephant,Yes,No,4.1,787,"[Momos, LunchBuffet, ChocolateNirvana, ThaiGre...",800,"[Momos, LunchBuffet, ChocolateNirvana, ThaiGre..."
2,San Churro Cafe,Yes,No,3.8,918,"[Churros, Cannelloni, MinestroneSoup, HotChoco...",800,"[Churros, Cannelloni, MinestroneSoup, HotChoco..."
3,Addhuri Udupi Bhojana,No,No,3.7,88,[MasalaDosa],300,"[MasalaDosa, Great food and proper Karnataka..."
4,Grand Village,No,No,3.8,166,"[Panipuri, GolGappe]",600,"[Buffet, Banashankari, Basavanagudi, CasualDin..."


In [55]:
new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))

In [56]:
new_df['tags']

0       Pasta LunchBuffet MasalaPapad PaneerLajawab To...
1       Momos LunchBuffet ChocolateNirvana ThaiGreenCu...
2       Churros Cannelloni MinestroneSoup HotChocolate...
3       MasalaDosa   Great food and proper Karnataka s...
4       Buffet Banashankari Basavanagudi CasualDining ...
                              ...                        
9995    Delivery BTM Rajajinagar QuickBites NorthIndia...
9996    Delivery BTM BannerghattaRoad QuickBites Chine...
9997    Delivery BTM BannerghattaRoad Cafe Bakery Cafe...
9998    Delivery BTM Koramangala1stBlock QuickBites No...
9999    Delivery BTM Koramangala5thBlock Mess Andhra B...
Name: tags, Length: 10000, dtype: object

## Stemming words

In [57]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [58]:
def stem_fn(text):
  y = []

  for i in text.split():
    y.append(ps.stem(i))
  return " ".join(y)

In [59]:
new_df['tags'] = new_df['tags'].apply(stem_fn)

In [60]:
new_df['tags'][1]


'momo lunchbuffet chocolatenirvana thaigreencurri paneertikka dumbiryani chickenbiryani had been here for dinner with famili turn out to be a good choos suitabl for all age of peopl can tri thi place we like the most wa their starter servic is good price are afford will recommend thi restaur for earli dinner the place is littl noisi buffet banashankari banashankari casualdin chines northindian thai'

In [61]:
new_df['tags'][8888]

"mocktail coffe salad barbecuechickenw pizza meatbal hotchocol food-3 0\\nambience-4 0\\nservice-2 0\\nwell let' start with the posit i had order the grill chicken burger, veg nacho and raspberri ice tea \\nburger wa alright well cook not too dri patti with tomato and lettuc and chees overal a decent burger noth to complain about the salsa which came with the nacho wa ketchup with onion in it and i wouldn't have complain if i dint get salsa raspberri ice tea did not have hint of raspberri \\nthe servic wa especi bad \\nfirstli i wa deni regular water say they do not offer regular water and that i had to purchas over price miner water upon insist over and over they gave me regular water my raspberri ice tea had an ant in it even though i want to order more there wa no one want to take the order even though they pass by the onli nice thing about thi experi is the place it self deliveri btm koramangala5thblock cafe cafe continent beverag"

## Text Vecorization

 Removed stop words like and, or before applying vectorization

In [62]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 5000, stop_words = 'english')

In [63]:
vectors = cv.fit_transform(new_df['tags']).toarray()
vectors

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [64]:
import nltk

In [65]:
ps.stem('dance')

'danc'

In [66]:
new_df.head()

Unnamed: 0,name,online_order,book_table,rate,votes,dish_liked,approx_cost(for two people),tags
0,Jalsa,Yes,Yes,4.1,775,"[Pasta, LunchBuffet, MasalaPapad, PaneerLajawa...",800,pasta lunchbuffet masalapapad paneerlajawab to...
1,Spice Elephant,Yes,No,4.1,787,"[Momos, LunchBuffet, ChocolateNirvana, ThaiGre...",800,momo lunchbuffet chocolatenirvana thaigreencur...
2,San Churro Cafe,Yes,No,3.8,918,"[Churros, Cannelloni, MinestroneSoup, HotChoco...",800,churro cannelloni minestronesoup hotchocol pin...
3,Addhuri Udupi Bhojana,No,No,3.7,88,[MasalaDosa],300,masaladosa great food and proper karnataka sty...
4,Grand Village,No,No,3.8,166,"[Panipuri, GolGappe]",600,buffet banashankari basavanagudi casualdin nor...


In [67]:
new_df.to_csv('zomato_clean.csv', index = False)

In [68]:
s1 = data['listed_in(type)'] + data['listed_in(city)'] + data['updated_location'] + data['updated_rest_type'] + data['updated_cuisines']
s2 = data['dish_liked'] + data['reviews_list'] + data['listed_in(type)'] + data['listed_in(city)'] + data['updated_location'] + data['updated_rest_type'] + data['updated_cuisines']

data['tags'] = np.where(data['reviews_list'].isnull(), s1, s2) 
         


In [69]:
new_df = data.drop(['reviews_list' ,'listed_in(type)', 'listed_in(city)', 'updated_location', 'updated_rest_type', 'updated_cuisines'], axis = 1)

In [70]:
new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))

In [71]:
new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())

In [72]:
new_df['tags'][0]

'pasta lunchbuffet masalapapad paneerlajawab tomatoshorba dumbiryani sweetcornsoup   a beautiful place to dine in the interiors take you back to the mughal era  the lightings are just perfect we went there on the occasion of christmas and so they had only limited items available  but the taste and service was not compromised at all the only complaint is that the breads could have been better would surely like to come here again  buffet banashankari banashankari casualdining northindian mughlai chinese'

## Performing cosine similarity

In [73]:
from sklearn.metrics.pairwise import cosine_similarity

In [74]:
similarity = cosine_similarity(vectors)

In [75]:
similarity

array([[1.        , 0.31622777, 0.14509525, ..., 0.        , 0.06666667,
        0.        ],
       [0.31622777, 1.        , 0.27529888, ..., 0.        , 0.06324555,
        0.        ],
       [0.14509525, 0.27529888, 1.        , ..., 0.29346959, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.29346959, ..., 1.        , 0.26967994,
        0.24618298],
       [0.06666667, 0.06324555, 0.        , ..., 0.26967994, 1.        ,
        0.36514837],
       [0.        , 0.        , 0.        , ..., 0.24618298, 0.36514837,
        1.        ]])

## Recommendation main function

In [76]:
def recomm(res):
    res_ind = new_df[new_df['name'] == res].index[0]
    res_list = sorted(list(enumerate(similarity[res_ind])), reverse = True, key = lambda x: x[1])[1:6]
#     print(sorted(list(enumerate(similarity[res_ind])), reverse = True, key = lambda x: x[1]))
    
    for i in res_list:
        print(new_df.iloc[i[0]]['name'])

In [80]:
recomm('Pizza Hut')

Pizza Hut
Pizza Hut
Melting Melodies
The Pizzeria
Nandhini Deluxe
