<a href="https://colab.research.google.com/github/rahul-jha98/zomato_sales_visualizer/blob/master/ZomatoDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import json
import requests
from tqdm.notebook import tqdm

In [0]:
class Restaurant:
    def __init__(self, restaurant_json):
        self.restaurant_json = restaurant_json
        self.prepare_prameters_from_json()
    
    
    def get(self, value, json = ''):
        if json == '':
            json = self.restaurant_json
            
        if json == None:
            return None
        
        return json.get(value, None)

    def prepare_prameters_from_json(self):
        self.id = self.get('id')
        self.name = self.get('name')
        self.url = self.get('url')
        
        location = self.get('location')
        self.address = self.get('address', location)
        self.latitude = self.get('latitude', location)
        self.longitude = self.get('longitude', location)
        self.location = self.get('locality', location)
        self.city = self.get('locality', location)
        
        
        self.online_order = 'Yes' if self.get('has_online_delivery') == 1 else 'No'
        self.book_table = 'Yes' if self.get('has_table_booking') == 1 else 'No'
        
        
        rating = self.get('aggregate_rating', self.get('user_rating'))
        
        if rating == None or rating == "NEW":
            self.rate = rating
        else:
            self.rate = '{}/5'.format(rating)
        
        self.votes = self.get('votes', self.get('user_rating'))
        
        phone = self.get('phone_numbers')
        if sum(c.isdigit() for c in phone) < 10:
            self.phone = None
        else:
            self.phone = phone
        
        self.rest_type = ', '.join(self.get('establishment'))
        
        self.cuisines = self.get('cuisines')
        
        self.approx_cost = self.get('average_cost_for_two')
        
        self.dish_liked = ''
        
        self.reviews = []


        
    def set_reviews(self, reviews):
        self.reviews = reviews


    
    def set_dish_liked(self, dish_liked):
        self.dish_liked = dish_liked

        
    
    def __str__(self):
        basic =  "ID : {0}\nName: {1}\nURL: {2}\n\n".format(self.id, self.name, self.url)
        
        location = "Address : {0}\nLatitude : {1}\nLongitue : {2}\nLocation : {3}\n\n".format(self.address,
                                                                                             self.latitude,
                                                                                             self.longitude,
                                                                                             self.location)
        
        online = "Online : {0}\nBooking : {1}\nRating : {2}\nVotes : {3}\nPhone: {4}\n\n".format(self.online_order,
                                                                                            self.book_table,
                                                                                            self.rate,
                                                                                            self.votes,
                                                                                            self.phone)
        other = "Approx Cost: {0}\nCusines: {1}\nRestaurant Type: {2}\nDish Liked: {3}\n".format(self.approx_cost,
                                                                             self.cuisines,
                                                                             self.rest_type,
                                                                             self.dish_liked)
        
        reviews = "Reviews {}".format(self.reviews)
        
        return basic + location + online + other + reviews

    def get_row(self):
      
      return [self.url, self.address, self.name, self.online_order, self.book_table, self.rate, self.votes,
              self.phone, self.location, self.rest_type, self.dish_liked, self.cuisines, self.approx_cost, 
              self.reviews, self.latitude, self.longitude, self.location]


In [0]:
class ZomatoDatasetCreator:
    def __init__(self, city_name):
        
        # self.API_KEYS = ["1c1827e986cbb720c34bc661fdbd8884", 
        #                  "765fdb97e275ccf353c49c3c2ec68a7b",
        #                  "151799c34aa8943e8028a167e43f9588"]

        ## Abhi naya bana ke do daal do isme kal purana wala bhi append kar dena list me
        ## Basically ek city ke liye around 1500 calls hote hai so 
        self.API_KEYS = ["014af0114a43afec41812542b307726b", 
                         ]               
        self.BASE_URL = "https://developers.zomato.com/api/v2.1/"
        
        self.api_count = 0
        self.api_len = len(self.API_KEYS)
        
        self.city_name = city_name
    
    def get_response(self, end_point, query_params_dict):    
        url = self.BASE_URL + end_point
        
        self.api_count = (self.api_count + 1) % self.api_len
        
        return requests.get(url, 
                    params = query_params_dict,
                    headers = {'user-key':self.API_KEYS[self.api_count]}) 
    
    
    def search_city(self):
        response = self.get_response('locations', {'query': self.city_name})

        if response:
            result = response.json()['location_suggestions']

            if len(result) == 0:
                raise Exception("Search result is empty.")
            else:

                self.city_name = result[0]['city_name']
                self.city_id = result[0]['city_id']
                print("Setting the city name to ", self.city_name)
        else:
            raise Exception("Network Error")
            
            
            
    
    def fetch_establishments_dictionary(self):
        response = self.get_response('establishments', {'city_id': self.city_id})

        all_establishments = response.json()['establishments']

        estabishment_dict = {}

        for establishment in all_establishments:
            establishment = establishment['establishment']
            key, value = establishment.values()

            if type(value) == str:
                estabishment_dict[key] = value
            else:
                establishment_dict[value] = key

        return estabishment_dict
    
    
    
    
    
    
    def fetch_all_restaurants(self):
        
        self.all_restaurant_ids = {}

        establishments = self.fetch_establishments_dictionary()

        self.all_restaurants = []
        for sort_param in ['cost', 'rating']:
            self.sort_param = sort_param
            print('\n' + self.sort_param)
            for e_id in establishments.keys():
                print('\n' + establishments[e_id])
                self.fetch_restaurants_of_type(e_id)    
                
                
                
                
    def fetch_restaurants_of_type(self, establishment_id, offset = 0, 
                                sort_order = 'desc', count = 20, max_pages = 5):
    
        if offset == max_pages:
            return

        response = self.get_response('search', {'entity_id': self.city_id, 'entity_type': 'city',
                                           'establishment_type': establishment_id,
                                           'sort': self.sort_param,
                                           'order':sort_order,
                                          'start': offset * 20,
                                          'count':count})

        if response:
            result = response.json()
            queried = result['results_start'] + result['results_shown']
            print(queried, end = ' ')
            total = result['results_found']
            count -= result['results_shown']


            restaurants_json = result['restaurants']

            for restaurant in restaurants_json:
                if self.all_restaurant_ids.get(restaurant['restaurant']['id'], -1) == -1:
                    self.all_restaurant_ids[restaurant['restaurant']['id']] = 0
                    self.all_restaurants.append(Restaurant(restaurant['restaurant']))


            if total > queried and sort_order == 'desc': 
                if offset + 1 < max_pages:
                    self.fetch_restaurants_of_type(establishment_id, offset + 1)
                else:
                    left = total - queried
                    self.fetch_restaurants_of_type(establishment_id, 0, 'asc', left)

            elif total > queried: 
                self.fetch_restaurants_of_type(establishment_id, offset + 1, 'asc', count)
            
        else:
            return None
        
        
    def fetch_reviews(self, res_id):
        response = self.get_response('reviews', 
                         {'res_id': res_id})
        
        reviews = []
        if response:
            response = response.json()
            fetched_reviews = response['user_reviews']
            
            for review in fetched_reviews:
                rating = 'Rated {}'.format(review['review']['rating'])
                text = review['review']['review_text']
                reviews.append((rating, text))
            
            return reviews
        else:
            return []
        
    
    def populate_reviews(self):
        for restaurant in tqdm(self.all_restaurants):
            restaurant.set_reviews(self.fetch_reviews(restaurant.id))
            
    def populate_dish_liked(self):
        
        def find_nth_occurance(string, char, n):
            val = -1
            for i in range(n):
                val = string.find(char, val + 1)
            return val
        

        for restaurant in tqdm(self.all_restaurants):
            
            URL = restaurant.url

            headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
            response = requests.get(URL.split('?')[0],headers=headers)     

            html = response.text

            pos = html.find('Top Dishes People Order')
            if  pos!= -1:
                remaining = html[pos:]
                start = find_nth_occurance(remaining, '>', 2)
                end = find_nth_occurance(remaining, '<', 3)
                restaurant.set_dish_liked(remaining[start + 1:end])

In [0]:
data_creator = ZomatoDatasetCreator("Allahabad")

In [32]:
data_creator.search_city()

Setting the city name to  Allahabad


In [33]:
data_creator.fetch_all_restaurants()


cost

Casual Dining
20 40 60 80 100 20 40 60 80 100 
Sweet Shop
20 40 60 80 86 
Quick Bites
20 40 60 80 100 20 40 60 80 100 
Fine Dining
2 
Bhojanalya
14 
Bar
9 
Dessert Parlour
20 40 60 65 
Café
20 40 45 
Dhaba
20 23 
Kiosk
13 
Bakery
20 33 
Lounge
8 
Beverage Shop
20 24 
Butcher Shop
2 
Food Court
7 
Food Truck
5 
Paan Shop
4 
Confectionery
1 
General Store
0 
Microbrewery
0 
rating

Casual Dining
20 40 60 80 100 20 40 60 80 100 
Sweet Shop
20 40 60 80 86 
Quick Bites
20 40 60 80 100 20 40 60 80 100 
Fine Dining
2 
Bhojanalya
14 
Bar
9 
Dessert Parlour
20 40 60 65 
Café
20 40 45 
Dhaba
20 23 
Kiosk
13 
Bakery
20 33 
Lounge
8 
Beverage Shop
20 24 
Butcher Shop
2 
Food Court
7 
Food Truck
5 
Paan Shop
4 
Confectionery
1 
General Store
0 
Microbrewery
0 

In [34]:
data_creator.populate_reviews()

HBox(children=(IntProgress(value=0, max=786), HTML(value='')))




In [0]:
# data_creator.populate_dish_liked()

In [0]:
data_list = data_creator.all_restaurants

In [36]:
print(data_list[285])

ID : 18708408
Name: Goli Vada Pav No.1
URL: https://www.zomato.com/allahabad/goli-vada-pav-no-1-1-civil-lines?utm_source=api_basic_user&utm_medium=api&utm_campaign=v2.1

Address : Near TPS College, Meerapur Road, Civil Lines, Allahabad
Latitude : 25.4304554570
Longitue : 81.8298444897
Location : Civil Lines

Online : No
Booking : No
Rating : 3.7/5
Votes : 34
Phone: +91 8953039399

Approx Cost: 500
Cusines: Street Food, Fast Food
Restaurant Type: Quick Bites
Dish Liked: 
Reviews [('Rated 5', ''), ('Rated 4', ''), ('Rated 5', 'cheese & corn pops were amazing.. 🌯Chrispy Rolls Were really yumm & The filling was so good and fresh..in such an affordable price 🍶 Mojito was also refreshing ❤️ Do go & try this place everyone😍'), ('Rated 5', 'Small place having good ambience. Total pocket friendly. Their one of the best food is pao bhaji and paneer roll if u visit must try. Service was not so good little slow.'), ('Rated 3', 'A very normal place to hang out with friends had paneer supreme vada p

**Fetched data to CSV**

In [0]:
import numpy as np
import pandas as pd

In [54]:
np_data  = np.empty((len(data_list), len(data_list[0].get_row())), dtype=object)
for i in tqdm(range(len(data_list[:]))):
  rest = data_list[i]
  np_data[i] = rest.get_row()

HBox(children=(IntProgress(value=0, max=786), HTML(value='')))




In [55]:
np_data

array([['https://www.zomato.com/allahabad/rang-mahal-hotel-ajay-international-civil-lines?utm_source=api_basic_user&utm_medium=api&utm_campaign=v2.1',
        'A/2/D, Lal Bahadur Shastri Marg, Civil Lines, Allahabad',
        'Rang Mahal - Hotel Ajay International', ..., '25.4539306187',
        '81.8280544505', 'Civil Lines'],
       ['https://www.zomato.com/allahabad/bercos-4-civil-lines?utm_source=api_basic_user&utm_medium=api&utm_campaign=v2.1',
        '17C, 0-1, Stretchy Road, Civil Lines, Allahabad', "Berco's",
        ..., '25.4522500000', '81.8328900000', 'Civil Lines'],
       ['https://www.zomato.com/allahabad/fusion-bar-restaurant-civil-lines?utm_source=api_basic_user&utm_medium=api&utm_campaign=v2.1',
        '3C/7C, Clive Road, Opposite Sangam Place, Civil Lines, Allahabad',
        'Fusion Bar & Restaurant', ..., '0.0000000000', '0.0000000000',
        'Civil Lines'],
       ...,
       ['https://www.zomato.com/allahabad/taste-of-prayagraj-2-dhoomanganj?utm_source=api_ba

In [56]:
df = pd.DataFrame(np_data, columns = ['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes', 'phone', 'location',
                                      'rest_type', 'dish_liked', 'cuisines', 'approx_cost', 'reviews', 'latitude', 'longitude', 'location'])

df.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost,reviews,latitude,longitude,location.1
0,https://www.zomato.com/allahabad/rang-mahal-ho...,"A/2/D, Lal Bahadur Shastri Marg, Civil Lines, ...",Rang Mahal - Hotel Ajay International,No,No,3.5/5,15,,Civil Lines,Casual Dining,,"North Indian, Chinese, Continental, South Indian",2000,"[(Rated 3, Food is tasty and service is also g...",25.4539306187,81.8280544505,Civil Lines
1,https://www.zomato.com/allahabad/bercos-4-civi...,"17C, 0-1, Stretchy Road, Civil Lines, Allahabad",Berco's,Yes,No,4.2/5,65,+91 7398254296,Civil Lines,Casual Dining,,"Chinese, Thai, Asian",1600,"[(Rated 5, We are very glad that rahul and Vir...",25.45225,81.83289,Civil Lines
2,https://www.zomato.com/allahabad/fusion-bar-re...,"3C/7C, Clive Road, Opposite Sangam Place, Civi...",Fusion Bar & Restaurant,No,No,3.1/5,4,+91 7355922594,Civil Lines,Bar,,"North Indian, Chinese, Fast Food, Bar Food",1600,"[(Rated 4, )]",0.0,0.0,Civil Lines
3,https://www.zomato.com/allahabad/jannat-hotel-...,"2nd Floor, South Road, Civil Lines, Allahabad",Jannat - Hotel Kanhashyam,No,No,3.8/5,94,"0532 2560123, +91 9918201943",Civil Lines,Casual Dining,,North Indian,1300,"[(Rated 2, You donot expect this from the only...",25.4495460821,81.8329384178,Civil Lines
4,https://www.zomato.com/allahabad/barbeque-nati...,"Fifth Floor, P Square Mall, MG Road, George To...",Barbeque Nation,No,No,4.7/5,400,+91 7026756060,George Town,Casual Dining,,"North Indian, Mediterranean",1200,"[(Rated 5, A great lunch with friends on Barbe...",25.449453,81.839536,George Town


In [0]:
df.to_csv('data.csv')

In [0]:
URL = "https://www.zomato.com/allahabad/goli-vada-pav-no-1-1-civil-lines?utm_source=api_basic_user&utm_medium=api&utm_campaign=v2.1"

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
response = requests.get(URL.split('?')[0],headers=headers)    