## 1.. Lets Read Data For Analysis

In [6]:
### import necessary packages 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
data = pd.read_csv("../data/zomato.csv")

In [8]:
type(data) ## ie above "data" belongs to dataframe data-structure

pandas.core.frame.DataFrame

In [9]:
data.head(3) ## getting first 3 rows of data

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari


In [10]:
data.columns ## getting all the columns of dataframe

Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)'],
      dtype='object')

In [11]:
data.shape ## getting dimensions of dataframe ..

(51717, 17)

## 2.. Performing data-preprocessing on data ..

In [12]:
data.duplicated().sum()

np.int64(0)

In [13]:
data.duplicated().sum() ## 0 duplicated entries

np.int64(0)

In [14]:
data.isnull().sum() ## getting total missing values in every feature of dataframe

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [15]:
### So first & foremost  we have to clean the missing values of location feature in order to come up with interesting results..

In [16]:
### As we have few missing values in location feature ,then we can drop that..

data.dropna(subset=['location'] , inplace = True)

In [17]:
data.isnull().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7754
votes                              0
phone                           1187
location                           0
rest_type                        206
dish_liked                     28057
cuisines                          24
approx_cost(for two people)      325
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [18]:
data.head(3)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari


In [19]:
df = data.copy() ## creating copy of "data" into "df" ,so that whatever manipulations we will do , we will do on top of df

In [20]:
### Lets make every place more readible so that u will get more more accurate geographical co-ordinates..

In [21]:
df['location'] = df['location'] + ' , Bangalore , Karnataka , India'

In [22]:
df['location']

0             Banashankari , Bangalore , Karnataka , India
1             Banashankari , Bangalore , Karnataka , India
2             Banashankari , Bangalore , Karnataka , India
3             Banashankari , Bangalore , Karnataka , India
4             Basavanagudi , Bangalore , Karnataka , India
                               ...                        
51712           Whitefield , Bangalore , Karnataka , India
51713           Whitefield , Bangalore , Karnataka , India
51714           Whitefield , Bangalore , Karnataka , India
51715    ITPL Main Road, Whitefield , Bangalore , Karna...
51716    ITPL Main Road, Whitefield , Bangalore , Karna...
Name: location, Length: 51696, dtype: object

In [23]:
df.dtypes ## various data-types

url                            object
address                        object
name                           object
online_order                   object
book_table                     object
rate                           object
votes                           int64
phone                          object
location                       object
rest_type                      object
dish_liked                     object
cuisines                       object
approx_cost(for two people)    object
reviews_list                   object
menu_item                      object
listed_in(type)                object
listed_in(city)                object
dtype: object

In [24]:
'''
Categorical data has : Object & bool data-types 
Numerical data have : Integer & Float data-type


Categorical data refers to a data type that can be stored into groups/categories/labels 
Examples of categorical variables are  age group, blood type etc.. 


Numerical data refers to the data that is in the form of numbers, 
Examples of numerical data are height, weight, age etc.. 

Numerical data has two categories: discrete data and continuous data


Discrete data : It basically takes countable numbers like 1, 2, 3, 4, 5, and so on. 
                age of a fly : 8 , 9 day etc..
                
Continuous data : which is continuous in nature 
                  amount of sugar , 11.2 kg  , temp of a city  , your bank balance !
                  


'''

'\nCategorical data has : Object & bool data-types \nNumerical data have : Integer & Float data-type\n\n\nCategorical data refers to a data type that can be stored into groups/categories/labels \nExamples of categorical variables are  age group, blood type etc.. \n\n\nNumerical data refers to the data that is in the form of numbers, \nExamples of numerical data are height, weight, age etc.. \n\nNumerical data has two categories: discrete data and continuous data\n\n\nDiscrete data : It basically takes countable numbers like 1, 2, 3, 4, 5, and so on. \n                age of a fly : 8 , 9 day etc..\n                \nContinuous data : which is continuous in nature \n                  amount of sugar , 11.2 kg  , temp of a city  , your bank balance !\n                  \n\n\n'

In [25]:
'''

Variations of int are : ('int64','int32','int16') in numpy library..


Int16 is a 16 bit signed integer , it means it can store both positive & negative values
int16 has has a range of  (2^15 − 1) to -2^15 
int16 has a length of 16 bits (2 bytes).. ie Int16 uses 16 bits 


Int32 is a 32 bit signed integer , it means it storesboth positive & negative values
int32 has has a range of (2³¹ − 1) to  -2^31
int32 has a length of 32 bits (4 bytes),, ie Int32 uses 32 bits


Int64 is a 64 bit signed integer , it means it can store both positive & negative values
int64 has has a range of  (2^63 − 1) to -2^63 
int64 has a length of 64 bits (8 bytes) , ie Int64 uses 64 bits.
             

The only difference is that int64 has max range of storing numbers , then comes int32 , then 16 , then int8

That means that Int64’s take up twice as much memory-and doing 
operations on them may be a lot slower in some machine architectures.

However, Int64’s can represent numbers much more accurately than 
32 bit floats.They also allow much larger numbers to be stored..

'''


"\n\nVariations of int are : ('int64','int32','int16') in numpy library..\n\n\nInt16 is a 16 bit signed integer , it means it can store both positive & negative values\nint16 has has a range of  (2^15 − 1) to -2^15 \nint16 has a length of 16 bits (2 bytes).. ie Int16 uses 16 bits \n\n\nInt32 is a 32 bit signed integer , it means it storesboth positive & negative values\nint32 has has a range of (2³¹ − 1) to  -2^31\nint32 has a length of 32 bits (4 bytes),, ie Int32 uses 32 bits\n\n\nInt64 is a 64 bit signed integer , it means it can store both positive & negative values\nint64 has has a range of  (2^63 − 1) to -2^63 \nint64 has a length of 64 bits (8 bytes) , ie Int64 uses 64 bits.\n             \n\nThe only difference is that int64 has max range of storing numbers , then comes int32 , then 16 , then int8\n\nThat means that Int64’s take up twice as much memory-and doing \noperations on them may be a lot slower in some machine architectures.\n\nHowever, Int64’s can represent numbers muc

## 3.. Extract Latitudes & longitudes from data..

#### definition of latitudes & Longitudes :
    The globe is split into an imaginary 360 sections from both top to bottom (north to south) 
    and 180 sections from side to side (west to east). The sections running from top to bottom on a globe are called
    longitude and the sections running from side to side on a globe are called latitude.
    

In [26]:
## first we will learn how to extract Latitudes & longitudes using 'location' feature

In [27]:
df.columns

Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)'],
      dtype='object')

In [28]:
df.head(2)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari


In [29]:
rest_loc = pd.DataFrame() ## creating dataframe of all the unique locations of Bengalore

In [30]:
rest_loc['Name'] = df['location'].unique()

In [31]:
rest_loc

Unnamed: 0,Name
0,"Banashankari , Bangalore , Karnataka , India"
1,"Basavanagudi , Bangalore , Karnataka , India"
2,"Mysore Road , Bangalore , Karnataka , India"
3,"Jayanagar , Bangalore , Karnataka , India"
4,"Kumaraswamy Layout , Bangalore , Karnataka , I..."
...,...
88,"West Bangalore , Bangalore , Karnataka , India"
89,"Magadi Road , Bangalore , Karnataka , India"
90,"Yelahanka , Bangalore , Karnataka , India"
91,"Sahakara Nagar , Bangalore , Karnataka , India"


In [32]:
# !pip install geopy

In [33]:
from geopy.geocoders import Nominatim

### Nominatim is a tool to search OpenStreetMap data by address or location

In [34]:
geolocator = Nominatim(user_agent="app" , timeout=None ) ## set timeout=None to get rid of timeout error

In [35]:
lat = [] ## define lat list to store all the latitudes
lon = [] ## define lon list to store all the longitudes


for name in rest_loc['Name']:
    location = geolocator.geocode(name)
    
    if location is None:
        lat.append(np.nan)
        lon.append(np.nan)
        
    else:
        lat.append(location.latitude)
        lon.append(location.longitude)

In [36]:
print(lat)

[12.9393328, 12.9417261, 12.9597674, 12.9399039, 12.9067683, 12.9274413, 12.9660722, 12.9055682, 12.9096941, 12.864107149999999, 12.965717999999999, 12.95212575, 12.9527211, 12.9159707, 12.9368751, 12.9089453, 12.9855509, 12.848759900000001, 12.90056335, 12.9552572, 12.923772, 12.9489339, 12.9575547, 12.9348429, 12.9408685, 12.9666654, 12.9364846, 13.0394104, 12.9327778, 12.93103185, 12.9696365, 12.9739905, 12.9604498, 12.9962979, 12.9277245, 12.9986827, 13.0227204, 12.9755264, 12.9721256, 12.9749487, 12.9749032, 12.9778793, 12.9737987, 12.986391, 12.9810146, 12.9757994, 12.985592, 12.9813526, 12.9822324, 12.9934283, 13.0358698, 12.9624669, 12.9427087, 12.9678074, 12.9824848, 13.0027353, 12.9931876, 13.0093455, 12.9390255, 12.9783547, 12.957998, 12.97339325, 12.9578658, 12.96381425, 12.9876393, 12.9621761, 13.007516, 12.9243692, 12.9282918, 12.9489289, 12.9501192, 13.0005359, 13.0058357, 12.9817233, 13.0422794, 13.0258087, 13.0221416, 13.0170347, 13.0784743, nan, 12.973936, 12.9846713,

In [37]:
rest_loc['lat'] = lat
rest_loc['lon'] = lon

In [38]:
rest_loc

Unnamed: 0,Name,lat,lon
0,"Banashankari , Bangalore , Karnataka , India",12.939333,77.553982
1,"Basavanagudi , Bangalore , Karnataka , India",12.941726,77.575502
2,"Mysore Road , Bangalore , Karnataka , India",12.959767,77.556145
3,"Jayanagar , Bangalore , Karnataka , India",12.939904,77.582638
4,"Kumaraswamy Layout , Bangalore , Karnataka , I...",12.906768,77.559502
...,...,...,...
88,"West Bangalore , Bangalore , Karnataka , India",13.009652,77.553054
89,"Magadi Road , Bangalore , Karnataka , India",12.975608,77.555356
90,"Yelahanka , Bangalore , Karnataka , India",13.107915,77.585524
91,"Sahakara Nagar , Bangalore , Karnataka , India",13.062147,77.580061


We have found out latitude and longitude of each location listed in the dataset using geopy
This is used to plot maps.

In [39]:
rest_loc.isnull().sum() ## checking missing values in a rest_loc dataframe..

Name    0
lat     2
lon     2
dtype: int64

In [40]:
rest_loc['lat'].isnull()

0     False
1     False
2     False
3     False
4     False
      ...  
88    False
89    False
90    False
91    False
92    False
Name: lat, Length: 93, dtype: bool

In [41]:
rest_loc[rest_loc['lat'].isnull()]

Unnamed: 0,Name,lat,lon
79,"Rammurthy Nagar , Bangalore , Karnataka , India",,
85,"Sadashiv Nagar , Bangalore , Karnataka , India",,


In [42]:
### lets take help of google in such circumstances..
### google as "co-ordinates of St. Marks Road , Bangalore" & find co-ordinates as :

In [43]:
rest_loc.loc[45, 'lat'] = 12.9764122

In [44]:
rest_loc['lat'][45]

np.float64(12.9764122)

In [45]:
rest_loc.loc[45, 'lon'] = 77.6017437

In [47]:
import warnings
from warnings import filterwarnings
filterwarnings('ignore')

In [48]:
### for Rammurthy Nagar , Bangalore
### 13.0163° N, 77.6785° E

In [49]:
rest_loc.loc[79, 'lat'] = 13.0163
rest_loc.loc[79, 'lon'] = 77.6785

In [50]:
### for Sadashiv Nagar ,
### 13.0068 (Lat) & 77.5813(Lon)

In [51]:
rest_loc.loc[85,'lon'] = 77.5813
rest_loc.loc[85,'lat'] = 13.0068

In [52]:
rest_loc.isnull().sum()

Name    0
lat     0
lon     0
dtype: int64

## 4.. Writing Structured Queries to extract Latutudes & Longitudes

In [53]:
df.head(2)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari


In [54]:
geolocator = Nominatim(user_agent="app" , timeout=None )

In [55]:
df['address'][0]

'942, 21st Main Road, 2nd Stage, Banashankari, Bangalore'

In [56]:
loc = geolocator.geocode(df['address'][0])

In [57]:
hasattr(loc , 'latitude')


## hasattr(loc,'latitude')  Return whether the loc has an attribute of latitude or not..

### it means address doesn't have any property as latitude , ie it is unable to find any geo-graphical for address feature..

True

#### now how to find geo-graphical co-ordinates of address feature..
    For this u have to use structured query ...

For a structured query, provide a dictionary whose keys
    are like: `street`, `city`, `county`, `state`, `country`, or
    `postalcode`

In [58]:
address = {'street':'21st Main Road' , 'city':'Bangalore' , 'country':'India' , 'state':'Karnataka'}

In [59]:
address_geocode = geolocator.geocode(address)

In [60]:
hasattr(address_geocode , 'latitude')

True

In [61]:
hasattr(address_geocode , 'longitude')

True

In [62]:
address_geocode.latitude

12.940228

In [63]:
address_geocode.longitude

77.5537778

## 5.. Where are most number of restaurants located in Bengalore ?

In [64]:
df['location'].value_counts()

location
BTM , Bangalore , Karnataka , India                      5124
HSR , Bangalore , Karnataka , India                      2523
Koramangala 5th Block , Bangalore , Karnataka , India    2504
JP Nagar , Bangalore , Karnataka , India                 2235
Whitefield , Bangalore , Karnataka , India               2144
                                                         ... 
Yelahanka , Bangalore , Karnataka , India                   6
West Bangalore , Bangalore , Karnataka , India              6
Jakkur , Bangalore , Karnataka , India                      3
Rajarajeshwari Nagar , Bangalore , Karnataka , India        2
Peenya , Bangalore , Karnataka , India                      1
Name: count, Length: 93, dtype: int64

In [65]:
type(df['location'].value_counts())

pandas.core.series.Series

In [66]:
Rest_locations = df['location'].value_counts().reset_index()

In [67]:
Rest_locations.columns = ['Name' , 'count']

In [68]:
Rest_locations

Unnamed: 0,Name,count
0,"BTM , Bangalore , Karnataka , India",5124
1,"HSR , Bangalore , Karnataka , India",2523
2,"Koramangala 5th Block , Bangalore , Karnataka ...",2504
3,"JP Nagar , Bangalore , Karnataka , India",2235
4,"Whitefield , Bangalore , Karnataka , India",2144
...,...,...
88,"Yelahanka , Bangalore , Karnataka , India",6
89,"West Bangalore , Bangalore , Karnataka , India",6
90,"Jakkur , Bangalore , Karnataka , India",3
91,"Rajarajeshwari Nagar , Bangalore , Karnataka ,...",2


In [69]:
### Now we can say that These are my locations where most of my restaurants are located..

In [70]:
'''

lets create Heatmap of this results so that it becomes more user-friendly..
now In order to perform Spatial Anlysis(Geographical Analysis) , we need latitudes & longitudes of every location..

'''

'\n\nlets create Heatmap of this results so that it becomes more user-friendly..\nnow In order to perform Spatial Anlysis(Geographical Analysis) , we need latitudes & longitudes of every location..\n\n'

In [71]:
rest_loc

Unnamed: 0,Name,lat,lon
0,"Banashankari , Bangalore , Karnataka , India",12.939333,77.553982
1,"Basavanagudi , Bangalore , Karnataka , India",12.941726,77.575502
2,"Mysore Road , Bangalore , Karnataka , India",12.959767,77.556145
3,"Jayanagar , Bangalore , Karnataka , India",12.939904,77.582638
4,"Kumaraswamy Layout , Bangalore , Karnataka , I...",12.906768,77.559502
...,...,...,...
88,"West Bangalore , Bangalore , Karnataka , India",13.009652,77.553054
89,"Magadi Road , Bangalore , Karnataka , India",12.975608,77.555356
90,"Yelahanka , Bangalore , Karnataka , India",13.107915,77.585524
91,"Sahakara Nagar , Bangalore , Karnataka , India",13.062147,77.580061


### so lets merge both the dataframes or append one into another in order to get geographical co-ordinates..

In [73]:
Beng_rest_locations = Rest_locations.merge(rest_loc , on="Name")

In [74]:
type(Beng_rest_locations)

pandas.core.frame.DataFrame

In [75]:
Beng_rest_locations.head(5)

Unnamed: 0,Name,count,lat,lon
0,"BTM , Bangalore , Karnataka , India",5124,12.915971,77.616227
1,"HSR , Bangalore , Karnataka , India",2523,12.900563,77.649475
2,"Koramangala 5th Block , Bangalore , Karnataka ...",2504,12.934843,77.618977
3,"JP Nagar , Bangalore , Karnataka , India",2235,12.909694,77.586607
4,"Whitefield , Bangalore , Karnataka , India",2144,12.969637,77.749745


In [77]:
import folium

In [78]:
def Generate_basemap():
    basemap = folium.Map(location=[12.97 , 77.59])
    return basemap

In [79]:
basemap = Generate_basemap()

#### now in order to show-case it via Map(Heatmap) ,first we need to create BaseMap so that I can map our Heatmap on top of BaseMap !

In [80]:
### Geographic heat maps are used to identify where something occurs, and demonstrate areas of high and low density...

from folium.plugins import HeatMap

In [81]:
Beng_rest_locations.columns

Index(['Name', 'count', 'lat', 'lon'], dtype='object')

In [82]:
Beng_rest_locations[['lat', 'lon' , 'count']]

Unnamed: 0,lat,lon,count
0,12.915971,77.616227,5124
1,12.900563,77.649475,2523
2,12.934843,77.618977,2504
3,12.909694,77.586607,2235
4,12.969637,77.749745,2144
...,...,...,...
88,13.107915,77.585524,6
89,13.009652,77.553054,6
90,13.078474,77.606894,3
91,12.927441,77.515522,2


In [83]:
HeatMap(Beng_rest_locations[['lat', 'lon' , 'count']]).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x7f8a9ff455e0>

In [66]:
basemap

### Conclusions : in the city centre area , majority of the Restaurants are avaiable :

## 6.. Performing Marker Cluster Analysis !
        Similar to previous one , but just use Marker Cluster 

In [84]:
from folium.plugins import FastMarkerCluster

In [85]:
basemap = Generate_basemap()

In [86]:
Beng_rest_locations[['lat', 'lon' , 'count']]

Unnamed: 0,lat,lon,count
0,12.915971,77.616227,5124
1,12.900563,77.649475,2523
2,12.934843,77.618977,2504
3,12.909694,77.586607,2235
4,12.969637,77.749745,2144
...,...,...,...
88,13.107915,77.585524,6
89,13.009652,77.553054,6
90,13.078474,77.606894,3
91,12.927441,77.515522,2


In [87]:
FastMarkerCluster(Beng_rest_locations[['lat', 'lon' , 'count']]).add_to(basemap)

<folium.plugins.fast_marker_cluster.FastMarkerCluster at 0x7f8a9e2d98b0>

In [73]:
basemap

## 7.. Plotting all the markers of places of Bangalore !

Plotting Markers on the Map :

    Folium gives a folium.Marker() class for plotting markers on a map
    Just pass the latitude and longitude of the location, mention the popup and tooltip and add it to the map.

#### Plotting markers is a two-step process.
    1) you need to create a base map on which your markers will be placed
    2) and then add your markers to it:

In [88]:
Beng_rest_locations

Unnamed: 0,Name,count,lat,lon
0,"BTM , Bangalore , Karnataka , India",5124,12.915971,77.616227
1,"HSR , Bangalore , Karnataka , India",2523,12.900563,77.649475
2,"Koramangala 5th Block , Bangalore , Karnataka ...",2504,12.934843,77.618977
3,"JP Nagar , Bangalore , Karnataka , India",2235,12.909694,77.586607
4,"Whitefield , Bangalore , Karnataka , India",2144,12.969637,77.749745
...,...,...,...,...
88,"Yelahanka , Bangalore , Karnataka , India",6,13.107915,77.585524
89,"West Bangalore , Bangalore , Karnataka , India",6,13.009652,77.553054
90,"Jakkur , Bangalore , Karnataka , India",3,13.078474,77.606894
91,"Rajarajeshwari Nagar , Bangalore , Karnataka ,...",2,12.927441,77.515522


In [89]:
m = Generate_basemap() ## it will generate basemap

In [90]:
m

In [91]:
Beng_rest_locations

Unnamed: 0,Name,count,lat,lon
0,"BTM , Bangalore , Karnataka , India",5124,12.915971,77.616227
1,"HSR , Bangalore , Karnataka , India",2523,12.900563,77.649475
2,"Koramangala 5th Block , Bangalore , Karnataka ...",2504,12.934843,77.618977
3,"JP Nagar , Bangalore , Karnataka , India",2235,12.909694,77.586607
4,"Whitefield , Bangalore , Karnataka , India",2144,12.969637,77.749745
...,...,...,...,...
88,"Yelahanka , Bangalore , Karnataka , India",6,13.107915,77.585524
89,"West Bangalore , Bangalore , Karnataka , India",6,13.009652,77.553054
90,"Jakkur , Bangalore , Karnataka , India",3,13.078474,77.606894
91,"Rajarajeshwari Nagar , Bangalore , Karnataka ,...",2,12.927441,77.515522


In [92]:
# Add points to the map

for index , row in Beng_rest_locations.iterrows():
    folium.Marker(location = [row['lat'] , row['lon']] , popup=row['count']).add_to(m)

In [93]:
m

## 8.. Data cleaning in rate !
        In order to Analyse where are the restaurants situated with high average rate , 
        first we need to clean 'rate' feature ..

In [94]:
df.head(3)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,"Banashankari , Bangalore , Karnataka , India","Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari


In [95]:
df['rate']

0         4.1/5
1         4.1/5
2         3.8/5
3         3.7/5
4         3.8/5
          ...  
51712    3.6 /5
51713       NaN
51714       NaN
51715    4.3 /5
51716    3.4 /5
Name: rate, Length: 51696, dtype: object

In [96]:
df['rate'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', nan, '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [97]:
### now it needs cleaning..

In [98]:
df['rate'].isnull().sum()

np.int64(7754)

In [99]:
df['rate'].isnull().sum()/len(df)*100 ## ie approximately 15% of your rating belongs to missing values..

np.float64(14.999226245744351)

In [100]:
df.dropna(subset = ['rate'] , inplace = True)

In [101]:
df['rate'].isnull().sum()

np.int64(0)

In [102]:
df['rate'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [103]:
df.replace('NEW' , '0' , inplace=True)
df.replace('-' , '0' , inplace=True)

## Note:the decision of replacing {"New" -> "0"} & ("-" -> "0") can be changed ,as it depends upon domain expertise & your business analyst..

In [104]:
df['rate'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', '0', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [105]:
df['rating'] = df['rate'].str.replace('/5' ,'') ## replace "/5" with ''

In [106]:
df['rating']

0         4.1
1         4.1
2         3.8
3         3.7
4         3.8
         ... 
51709    3.7 
51711    2.5 
51712    3.6 
51715    4.3 
51716    3.4 
Name: rating, Length: 43942, dtype: object

In [107]:
df['rating'] = df['rating'].astype(float) ## converting its data-type into float

In [108]:
df['rating'].dtype

dtype('float64')

In [109]:
df['rating'].unique()

array([4.1, 3.8, 3.7, 3.6, 4.6, 4. , 4.2, 3.9, 3.1, 3. , 3.2, 3.3, 2.8,
       4.4, 4.3, 0. , 2.9, 3.5, 2.6, 3.4, 4.5, 2.5, 2.7, 4.7, 2.4, 2.2,
       2.3, 4.8, 4.9, 2.1, 2. , 1.8])

## 9.. Most highest rated restaurants ?

In [110]:
df.head(4)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city),rating
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,4.1
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,"Banashankari , Bangalore , Karnataka , India",Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,4.1
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,"Banashankari , Bangalore , Karnataka , India","Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.8
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,"Banashankari , Bangalore , Karnataka , India",Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.7


In [111]:
df.columns

Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)', 'rating'],
      dtype='object')

In [112]:
grp_df = df.groupby(['location'] , as_index=False).agg({'rating':'mean' , 'name':'size'})

## size tells total order placed at various locations ...
## bcz more number of order means high chances of restaurant being famous or popular !

In [113]:
grp_df.columns = ['Name' , 'avg_rating' , 'count']

In [114]:
grp_df

Unnamed: 0,Name,avg_rating,count
0,"BTM , Bangalore , Karnataka , India",3.296128,4261
1,"Banashankari , Bangalore , Karnataka , India",3.373292,805
2,"Banaswadi , Bangalore , Karnataka , India",3.362926,499
3,"Bannerghatta Road , Bangalore , Karnataka , India",3.271677,1324
4,"Basavanagudi , Bangalore , Karnataka , India",3.478185,628
...,...,...,...
87,"West Bangalore , Bangalore , Karnataka , India",2.020000,5
88,"Whitefield , Bangalore , Karnataka , India",3.384170,1693
89,"Wilson Garden , Bangalore , Karnataka , India",3.257635,203
90,"Yelahanka , Bangalore , Karnataka , India",3.640000,5


In [115]:
## lets consider only those restaurants who have send atleast 400 orders !

In [116]:
grp_df['count']>400

0      True
1      True
2      True
3      True
4      True
      ...  
87    False
88     True
89    False
90    False
91    False
Name: count, Length: 92, dtype: bool

In [117]:
temp_df = grp_df[grp_df['count']>400]

In [118]:
temp_df.shape

(35, 3)

In [119]:
temp_df

Unnamed: 0,Name,avg_rating,count
0,"BTM , Bangalore , Karnataka , India",3.296128,4261
1,"Banashankari , Bangalore , Karnataka , India",3.373292,805
2,"Banaswadi , Bangalore , Karnataka , India",3.362926,499
3,"Bannerghatta Road , Bangalore , Karnataka , India",3.271677,1324
4,"Basavanagudi , Bangalore , Karnataka , India",3.478185,628
6,"Bellandur , Bangalore , Karnataka , India",3.309833,1078
8,"Brigade Road , Bangalore , Karnataka , India",3.595849,1084
9,"Brookefield , Bangalore , Karnataka , India",3.374699,581
12,"Church Street , Bangalore , Karnataka , India",3.963091,550
15,"Cunningham Road , Bangalore , Karnataka , India",3.901053,475


In [120]:
rest_loc

Unnamed: 0,Name,lat,lon
0,"Banashankari , Bangalore , Karnataka , India",12.939333,77.553982
1,"Basavanagudi , Bangalore , Karnataka , India",12.941726,77.575502
2,"Mysore Road , Bangalore , Karnataka , India",12.959767,77.556145
3,"Jayanagar , Bangalore , Karnataka , India",12.939904,77.582638
4,"Kumaraswamy Layout , Bangalore , Karnataka , I...",12.906768,77.559502
...,...,...,...
88,"West Bangalore , Bangalore , Karnataka , India",13.009652,77.553054
89,"Magadi Road , Bangalore , Karnataka , India",12.975608,77.555356
90,"Yelahanka , Bangalore , Karnataka , India",13.107915,77.585524
91,"Sahakara Nagar , Bangalore , Karnataka , India",13.062147,77.580061


In [121]:
### lets merge both the dataframe so that we can get "latitudes" & "Longitudes" as well !

In [122]:
Ratings_locations = temp_df.merge(rest_loc , on='Name')

In [123]:
Ratings_locations

Unnamed: 0,Name,avg_rating,count,lat,lon
0,"BTM , Bangalore , Karnataka , India",3.296128,4261,12.915971,77.616227
1,"Banashankari , Bangalore , Karnataka , India",3.373292,805,12.939333,77.553982
2,"Banaswadi , Bangalore , Karnataka , India",3.362926,499,13.005836,77.628132
3,"Bannerghatta Road , Bangalore , Karnataka , India",3.271677,1324,12.952721,77.605161
4,"Basavanagudi , Bangalore , Karnataka , India",3.478185,628,12.941726,77.575502
5,"Bellandur , Bangalore , Karnataka , India",3.309833,1078,12.931032,77.678247
6,"Brigade Road , Bangalore , Karnataka , India",3.595849,1084,12.972126,77.607015
7,"Brookefield , Bangalore , Karnataka , India",3.374699,581,12.963814,77.722437
8,"Church Street , Bangalore , Karnataka , India",3.963091,550,12.974903,77.60529
9,"Cunningham Road , Bangalore , Karnataka , India",3.901053,475,12.985592,77.596157


In [124]:
basemap = Generate_basemap()

In [125]:
HeatMap(Ratings_locations[['lat', 'lon' , 'avg_rating']]).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x7f8a9d787410>

In [126]:
basemap

# 