1 - Reading Data

In [1]:
#Import necessary libraries
import sqlite3
import pandas as pd
import numpy as np

#Establish a connection to the SQLite database
con = sqlite3.connect(r"C:\Users\omerf\OneDrive\Masaüstü\Data Analysis Project\Zomato\Resources/zomato_rawdata.sqlite")

df = pd.read_sql_query("SELECT * FROM USERS", con)

In [2]:
df = pd.read_sql_query("SELECT * FROM USERS", con)

2 - Handle some missing values

In [3]:
df["rate"].replace(("NEW", "-"), np.nan, inplace= True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["rate"].replace(("NEW", "-"), np.nan, inplace= True)


In [4]:
df["rate"] = df["rate"].apply(lambda x: float(x.split("/")[0]) if type(x) == str else x)

3 - Extract Geographical Coordinates

In [5]:
df["location"].head()

0    Banashankari
1    Banashankari
2    Banashankari
3    Banashankari
4    Basavanagudi
Name: location, dtype: object

In [6]:
df["location"].unique()

array(['Banashankari', 'Basavanagudi', 'Mysore Road', 'Jayanagar',
       'Kumaraswamy Layout', 'Rajarajeshwari Nagar', 'Vijay Nagar',
       'Uttarahalli', 'JP Nagar', 'South Bangalore', 'City Market',
       'Nagarbhavi', 'Bannerghatta Road', 'BTM', 'Kanakapura Road',
       'Bommanahalli', None, 'CV Raman Nagar', 'Electronic City', 'HSR',
       'Marathahalli', 'Sarjapur Road', 'Wilson Garden', 'Shanti Nagar',
       'Koramangala 5th Block', 'Koramangala 8th Block', 'Richmond Road',
       'Koramangala 7th Block', 'Jalahalli', 'Koramangala 4th Block',
       'Bellandur', 'Whitefield', 'East Bangalore', 'Old Airport Road',
       'Indiranagar', 'Koramangala 1st Block', 'Frazer Town', 'RT Nagar',
       'MG Road', 'Brigade Road', 'Lavelle Road', 'Church Street',
       'Ulsoor', 'Residency Road', 'Shivajinagar', 'Infantry Road',
       'St. Marks Road', 'Cunningham Road', 'Race Course Road',
       'Commercial Street', 'Vasanth Nagar', 'HBR Layout', 'Domlur',
       'Ejipura', 'Jeevan

In [7]:
df["location"] = df["location"] + " , Bangalore, Karnataka, India "

In [8]:
df["location"].unique()

array(['Banashankari , Bangalore, Karnataka, India ',
       'Basavanagudi , Bangalore, Karnataka, India ',
       'Mysore Road , Bangalore, Karnataka, India ',
       'Jayanagar , Bangalore, Karnataka, India ',
       'Kumaraswamy Layout , Bangalore, Karnataka, India ',
       'Rajarajeshwari Nagar , Bangalore, Karnataka, India ',
       'Vijay Nagar , Bangalore, Karnataka, India ',
       'Uttarahalli , Bangalore, Karnataka, India ',
       'JP Nagar , Bangalore, Karnataka, India ',
       'South Bangalore , Bangalore, Karnataka, India ',
       'City Market , Bangalore, Karnataka, India ',
       'Nagarbhavi , Bangalore, Karnataka, India ',
       'Bannerghatta Road , Bangalore, Karnataka, India ',
       'BTM , Bangalore, Karnataka, India ',
       'Kanakapura Road , Bangalore, Karnataka, India ',
       'Bommanahalli , Bangalore, Karnataka, India ', nan,
       'CV Raman Nagar , Bangalore, Karnataka, India ',
       'Electronic City , Bangalore, Karnataka, India ',
       'HSR , B

In [9]:
#Drop the missing values
df_copy = df.copy()

df_copy["location"].isnull().sum()

np.int64(21)

In [10]:
df_copy = df_copy.dropna(subset=["location"])

df_copy["location"].isnull().sum()

np.int64(0)

In [11]:
#Turn into a dataframe
locations = pd.DataFrame(df_copy["location"].unique())

In [12]:
#Name the column
locations.columns = ["Name"]

In [13]:
locations.head()

Unnamed: 0,Name
0,"Banashankari , Bangalore, Karnataka, India"
1,"Basavanagudi , Bangalore, Karnataka, India"
2,"Mysore Road , Bangalore, Karnataka, India"
3,"Jayanagar , Bangalore, Karnataka, India"
4,"Kumaraswamy Layout , Bangalore, Karnataka, India"


In [14]:
#Import necessary libraries
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="app", timeout=None)

In [15]:
#Sometimes it gets HTTP500 error so it catches errors and retries if the attempts still fails, nan is added

from geopy.exc import GeocoderServiceError
import time

lat = []
lon = []

for location_name in locations["Name"]:
    retries = 3
    while retries > 0:
        try:
            location = geolocator.geocode(location_name)
            if location is None:
                lat.append(np.nan)
                lon.append(np.nan)
            else:
                lat.append(location.latitude)
                lon.append(location.longitude)
            break
        except GeocoderServiceError as e:
            print(f"Geocoding error for {location_name}: {e}")
            retries -= 1
            time.sleep(1)

    if retries == 0:
        lat.append(np.nan)
        lon.append(np.nan)

In [16]:
locations["latitude"] = lat
locations["longitude"] = lon

In [17]:
locations

Unnamed: 0,Name,latitude,longitude
0,"Banashankari , Bangalore, Karnataka, India",12.939333,77.553982
1,"Basavanagudi , Bangalore, Karnataka, India",12.941726,77.575502
2,"Mysore Road , Bangalore, Karnataka, India",12.952813,77.541528
3,"Jayanagar , Bangalore, Karnataka, India",12.939904,77.582638
4,"Kumaraswamy Layout , Bangalore, Karnataka, India",12.906768,77.559502
...,...,...,...
88,"West Bangalore , Bangalore, Karnataka, India",13.009476,77.553089
89,"Magadi Road , Bangalore, Karnataka, India",12.975608,77.555356
90,"Yelahanka , Bangalore, Karnataka, India",13.100698,77.596345
91,"Sahakara Nagar , Bangalore, Karnataka, India",13.062147,77.580061


4 - Build Geographical Heatmaps

In [18]:
locations.isnull().sum()

Name         0
latitude     2
longitude    2
dtype: int64

In [19]:
#Search it from Google
locations[locations["latitude"].isna()]

Unnamed: 0,Name,latitude,longitude
79,"Rammurthy Nagar , Bangalore, Karnataka, India",,
85,"Sadashiv Nagar , Bangalore, Karnataka, India",,


In [20]:
#To ignore warnings
import warnings
from warnings import filterwarnings
filterwarnings("ignore")

In [21]:
locations["latitude"][79] = 13.0163
locations["longitude"][79] = 77.6785

In [22]:
locations["latitude"][85] = 13.0068
locations["longitude"][85] = 77.5813

In [23]:
#Look for North Indian Restaurants
df["cuisines"].isnull().sum()

np.int64(45)

In [24]:
df = df.dropna(subset=["cuisines"])

In [25]:
north_india = df[df["cuisines"].str.contains("North Indian")]

In [26]:
north_india.shape

(21085, 18)

In [27]:
north_india_new = north_india["location"].value_counts().reset_index()
north_india_new.rename(columns= {"location" : "Name"}, inplace=True)
north_india_new.head()

Unnamed: 0,Name,count
0,"BTM , Bangalore, Karnataka, India",2469
1,"HSR , Bangalore, Karnataka, India",1123
2,"Whitefield , Bangalore, Karnataka, India",1059
3,"Marathahalli , Bangalore, Karnataka, India",1038
4,"JP Nagar , Bangalore, Karnataka, India",958


In [29]:
heatmap_df = north_india_new.merge(locations, on = "Name", how="left")

In [30]:
heatmap_df.head()

Unnamed: 0,Name,count,latitude,longitude
0,"BTM , Bangalore, Karnataka, India",2469,12.91636,77.604733
1,"HSR , Bangalore, Karnataka, India",1123,12.900563,77.649475
2,"Whitefield , Bangalore, Karnataka, India",1059,12.969637,77.749745
3,"Marathahalli , Bangalore, Karnataka, India",1038,12.955257,77.698416
4,"JP Nagar , Bangalore, Karnataka, India",958,12.909694,77.586607


In [35]:
#Import necessary libraries
#This plot might cannot be seen in Github
import folium

basemap = folium.Map(location=[13.0068, 77.5813], zoom_start=10)  # India's central location

from folium.plugins import HeatMap 

HeatMap(heatmap_df[["latitude", "longitude", "count"]]).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x28c0487bf10>

In [36]:
basemap

4 - How to Automate your Data Analysis

In [42]:
def get_heatmap(cuisine):
    cuisine_df = df[df["cuisines"].str.contains(cuisine)]

    cuisine_new = cuisine_df["location"].value_counts().reset_index()
    cuisine_new.rename(columns= {"location" : "Name"}, inplace=True)

    heatmap_df = cuisine_new.merge(locations, on = "Name", how = "left")

    print(heatmap_df.head())

    basemap = folium.Map(location=[13.0068, 77.5813], zoom_start=10)  # India's central location

    HeatMap(heatmap_df[["latitude", "longitude", "count"]]).add_to(basemap)

    return basemap

In [43]:
get_heatmap("South Indian")

                                        Name  count   latitude  longitude
0         BTM , Bangalore, Karnataka, India     815  12.916360  77.604733
1    JP Nagar , Bangalore, Karnataka, India     437  12.909694  77.586607
2         HSR , Bangalore, Karnataka, India     436  12.900563  77.649475
3   Jayanagar , Bangalore, Karnataka, India     416  12.939904  77.582638
4  Whitefield , Bangalore, Karnataka, India     308  12.969637  77.749745
