# Zomato API - Gathering Data

## In this Notebook:
 - Using zomato API, restaurant information from the Pune city was saved
 - A web scraper was created, but not executed to gather full review information (to adhere to ToS).

### Using offset and limit parameters in zoom API

As there is a limit of 20 restaurants per API call used this parameter with combination of other parameters


### Steps taken to retrieve data

- Get list of establishment (for restaurants)
- For each establishment, cycle through to get 100 restaurants of that type
- Do that for all establishment to retrieve all restaurant data
- For each restaurant in each category, create a scraper to gather review data (for example, for top 500 restaurants)
    - Built the scraper but did not execute, as it is against ToS.

In [11]:
import requests
import json
import csv
import time
import pandas as pd

In [33]:
## Define constants
API_KEY = '<--- API KEY HERE --->'
HOST = "https://developers.zomato.com/api/v2.1"
HEADERS = {    'Accept': 'application/json',
               'user-key': '%s' % API_KEY,
          }

## Testing API's and data

In [None]:
ENDPOINT = HOST + "/categories"
response = requests.get(url=ENDPOINT, headers=HEADERS)

In [21]:
# Load response in the file
f = open("data/categories.json", "w")
f.write(response.text)
f.close()

In [11]:
# get the cusines available on the zomato
ENDPOINT = HOST + "/cuisines"

# 5 is a pune city id
PARAMETERS = {'city_id': 5}

cuisines = requests.get(url=ENDPOINT, headers=HEADERS, params=PARAMETERS)

In [20]:
# Load response in the file
f = open("data/cusines.json", "w")
f.write(cuisines.text)
f.close()

In [117]:
# get the establishment available on the zomato
ENDPOINT = HOST + "/establishments"

# 5 is a pune city id
PARAMETERS = {'city_id': 5}

establishment = requests.get(url=ENDPOINT, headers=HEADERS, params=PARAMETERS)

# Load response in the file
f = open("data/establishment.json", "w")
f.write(establishment.text)
f.close()

In [16]:
establishment_data = []
with open('data/establishment.json') as f:
    data = json.load(f)

for establishment in data['establishments']:
    establishment_data.append(establishment['establishment']['id'])

In [17]:
len(establishment_data)

31

## Next, let's gather the data for all restaurants

In [19]:
restaurants_data = []

In [32]:
# get the restaurants
start=0
def get_restaurant(start):    
    ENDPOINT = HOST + "/search"

    for establishment in establishment_data:
        while True:
            # 5 is a pune city id
            PARAMETERS = {'entity_id': 5,
                          'entity_type': 'city',
                          'coun: t': 20,
                          'start': start,
                          'establishment_type': establishment
                         }

            restaurants = requests.get(url=ENDPOINT, headers=HEADERS, params=PARAMETERS)
            if restaurants.json().get('results_shown', 0) == 0:
                break

            restaurants_data.append(restaurants.text)
            start = start + 20
            time.sleep(2) ## Hey don't block me :) I'm your friend
        start = 0
get_restaurant(start)


In [21]:
len(restaurants_data)

62

In [22]:
# Load response in the file

f = open("data/restaurants.json", "w")
json.dump(restaurants_data,f)
# f.write(json.dump(restaurants_data))
f.close()

In [23]:
with open('data/restaurants.json') as f:
    data = json.load(f)

In [24]:
csvfile=open('zomato.csv', 'w',newline="")
writer=csv.writer(csvfile)  
writer.writerow(['Restaurant ID','Restaurant Name','Country Code','City','Address','Locality','Locality Verbose','Longitude','Latitude','Cuisines','Average Cost for two','Has Table booking','Has Online delivery','Is delivering now','Switch to order menu','Price range','Aggregate rating','Rating text','Votes', 'Review Count'])
for restautant in data:
    innerrestaurant = json.loads(restautant)
    for final_estaurant in innerrestaurant['restaurants']:
        currency=final_estaurant['restaurant']['currency']
        city=final_estaurant['restaurant']['location']['city']
        locality=final_estaurant['restaurant']['location']['locality']
        rating_text=final_estaurant['restaurant']['user_rating']['rating_text']
        name_res=final_estaurant['restaurant']['name']
        res_id=final_estaurant['restaurant']['R']['res_id']
        cuisines=final_estaurant['restaurant']['cuisines']
        has_table_booking=final_estaurant['restaurant']['has_table_booking']
        has_online_delivery=final_estaurant['restaurant']['has_online_delivery']
        city_id=final_estaurant['restaurant']['location']['city_id']
        address=final_estaurant['restaurant']['location']['address']
        locality_verbose=final_estaurant['restaurant']['location']['locality_verbose']
        longitude=final_estaurant['restaurant']['location']['longitude']
        latitude=final_estaurant['restaurant']['location']['latitude']
        is_delivering_now=final_estaurant['restaurant']['is_delivering_now']
        country_id=final_estaurant['restaurant']['location']['country_id']
        price_range=final_estaurant['restaurant']['price_range']
        switch_to_order_menu=final_estaurant['restaurant']['switch_to_order_menu']
        aggregate_rating=final_estaurant['restaurant']['user_rating']['aggregate_rating']
        votes=final_estaurant['restaurant']['user_rating']['votes']
        avg_cost=final_estaurant['restaurant']['average_cost_for_two']
        all_reviews_count=final_estaurant['restaurant']['all_reviews_count']
        
        writer.writerow([res_id,name_res,country_id,city,address,locality,locality_verbose,longitude,latitude,cuisines,avg_cost,has_table_booking,has_online_delivery,is_delivering_now,switch_to_order_menu,price_range,aggregate_rating,rating_text,votes, all_reviews_count])

In [25]:
zomato=pd.read_csv('zomato.csv')

In [31]:
# zomato.head()

In [28]:
zomato.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1133 entries, 0 to 1132
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         1133 non-null   int64  
 1   Restaurant Name       1133 non-null   object 
 2   Country Code          1133 non-null   int64  
 3   City                  1133 non-null   object 
 4   Address               1133 non-null   object 
 5   Locality              1133 non-null   object 
 6   Locality Verbose      1133 non-null   object 
 7   Longitude             1133 non-null   float64
 8   Latitude              1133 non-null   float64
 9   Cuisines              1133 non-null   object 
 10  Average Cost for two  1133 non-null   int64  
 11  Has Table booking     1133 non-null   int64  
 12  Has Online delivery   1133 non-null   int64  
 13  Is delivering now     1133 non-null   int64  
 14  Switch to order menu  1133 non-null   int64  
 15  Price range          