In [0]:
import json
import requests
from tqdm.notebook import tqdm

In [0]:
class Restaurant:
    def __init__(self, restaurant_json):
        self.restaurant_json = restaurant_json
        self.prepare_prameters_from_json()
    
    
    def get(self, value, json = ''):
        if json == '':
            json = self.restaurant_json
            
        if json == None:
            return None
        
        return json.get(value, None)

    def prepare_prameters_from_json(self):
        self.id = self.get('id')
        self.name = self.get('name')
        self.url = self.get('url')
        
        location = self.get('location')
        self.address = self.get('address', location)
        self.latitude = self.get('latitude', location)
        self.longitude = self.get('longitude', location)
        self.location = self.get('locality', location)
        self.city = self.get('locality', location)
        
        
        self.online_order = 'Yes' if self.get('has_online_delivery') == 1 else 'No'
        self.book_table = 'Yes' if self.get('has_table_booking') == 1 else 'No'
        
        
        rating = self.get('aggregate_rating', self.get('user_rating'))
        
        if rating == None or rating == "NEW":
            self.rate = rating
        else:
            self.rate = '{}/5'.format(rating)
        
        self.votes = self.get('votes', self.get('user_rating'))
        
        phone = self.get('phone_numbers')
        if sum(c.isdigit() for c in phone) < 10:
            self.phone = None
        else:
            self.phone = phone
        
        self.rest_type = ', '.join(self.get('establishment'))
        
        self.cuisines = self.get('cuisines')
        
        self.approx_cost = self.get('average_cost_for_two')
        
        self.dish_liked = ''
        
        self.reviews = []


        
    def set_reviews(self, reviews):
        self.reviews = reviews


    
    def set_dish_liked(self, dish_liked):
        self.dish_liked = dish_liked

        
    
    def __str__(self):
        basic =  "ID : {0}\nName: {1}\nURL: {2}\n\n".format(self.id, self.name, self.url)
        
        location = "Address : {0}\nLatitude : {1}\nLongitue : {2}\nLocation : {3}\n\n".format(self.address,
                                                                                             self.latitude,
                                                                                             self.longitude,
                                                                                             self.location)
        
        online = "Online : {0}\nBooking : {1}\nRating : {2}\nVotes : {3}\nPhone: {4}\n\n".format(self.online_order,
                                                                                            self.book_table,
                                                                                            self.rate,
                                                                                            self.votes,
                                                                                            self.phone)
        other = "Approx Cost: {0}\nCusines: {1}\nRestaurant Type: {2}\nDish Liked: {3}\n".format(self.approx_cost,
                                                                             self.cuisines,
                                                                             self.rest_type,
                                                                             self.dish_liked)
        
        reviews = "Reviews {}".format(self.reviews)
        
        return basic + location + online + other + reviews


In [0]:
class ZomatoDatasetCreator:
    def __init__(self, city_name):
        
        # self.API_KEYS = ["1c1827e986cbb720c34bc661fdbd8884", 
        #                  "765fdb97e275ccf353c49c3c2ec68a7b",
        #                  "151799c34aa8943e8028a167e43f9588"]

        ## Abhi naya bana ke do daal do isme kal purana wala bhi append kar dena list me
        ## Basically ek city ke liye around 1500 calls hote hai so 
        self.API_KEYS = ["014af0114a43afec41812542b307726b", 
                         ]               
        self.BASE_URL = "https://developers.zomato.com/api/v2.1/"
        
        self.api_count = 0
        self.api_len = len(self.API_KEYS)
        
        self.city_name = city_name
    
    def get_response(self, end_point, query_params_dict):    
        url = self.BASE_URL + end_point
        
        self.api_count = (self.api_count + 1) % self.api_len
        
        return requests.get(url, 
                    params = query_params_dict,
                    headers = {'user-key':self.API_KEYS[self.api_count]}) 
    
    
    def search_city(self):
        response = self.get_response('locations', {'query': self.city_name})

        if response:
            result = response.json()['location_suggestions']

            if len(result) == 0:
                raise Exception("Search result is empty.")
            else:

                self.city_name = result[0]['city_name']
                self.city_id = result[0]['city_id']
                print("Setting the city name to ", self.city_name)
        else:
            raise Exception("Network Error")
            
            
            
    
    def fetch_establishments_dictionary(self):
        response = self.get_response('establishments', {'city_id': self.city_id})

        all_establishments = response.json()['establishments']

        estabishment_dict = {}

        for establishment in all_establishments:
            establishment = establishment['establishment']
            key, value = establishment.values()

            if type(value) == str:
                estabishment_dict[key] = value
            else:
                establishment_dict[value] = key

        return estabishment_dict
    
    
    
    
    
    
    def fetch_all_restaurants(self):
        
        self.all_restaurant_ids = {}

        establishments = self.fetch_establishments_dictionary()

        self.all_restaurants = []
        for sort_param in ['cost', 'rating']:
            self.sort_param = sort_param
            print('\n' + self.sort_param)
            for e_id in establishments.keys():
                print('\n' + establishments[e_id])
                self.fetch_restaurants_of_type(e_id)    
                
                
                
                
    def fetch_restaurants_of_type(self, establishment_id, offset = 0, 
                                sort_order = 'desc', count = 20, max_pages = 5):
    
        if offset == max_pages:
            return

        response = self.get_response('search', {'entity_id': self.city_id, 'entity_type': 'city',
                                           'establishment_type': establishment_id,
                                           'sort': self.sort_param,
                                           'order':sort_order,
                                          'start': offset * 20,
                                          'count':count})

        if response:
            result = response.json()
            queried = result['results_start'] + result['results_shown']
            print(queried, end = ' ')
            total = result['results_found']
            count -= result['results_shown']


            restaurants_json = result['restaurants']

            for restaurant in restaurants_json:
                if self.all_restaurant_ids.get(restaurant['restaurant']['id'], -1) == -1:
                    self.all_restaurant_ids[restaurant['restaurant']['id']] = 0
                    self.all_restaurants.append(Restaurant(restaurant['restaurant']))


            if total > queried and sort_order == 'desc': 
                if offset + 1 < max_pages:
                    self.fetch_restaurants_of_type(establishment_id, offset + 1)
                else:
                    left = total - queried
                    self.fetch_restaurants_of_type(establishment_id, 0, 'asc', left)

            elif total > queried: 
                self.fetch_restaurants_of_type(establishment_id, offset + 1, 'asc', count)
            
        else:
            return None
        
        
    def fetch_reviews(self, res_id):
        response = self.get_response('reviews', 
                         {'res_id': res_id})
        
        reviews = []
        if response:
            response = response.json()
            fetched_reviews = response['user_reviews']
            
            for review in fetched_reviews:
                rating = 'Rated {}'.format(review['review']['rating'])
                text = review['review']['review_text']
                reviews.append((rating, text))
            
            return reviews
        else:
            return []
        
    
    def populate_reviews(self):
        for restaurant in tqdm(self.all_restaurants):
            restaurant.set_reviews(self.fetch_reviews(restaurant.id))
            
    def populate_dish_liked(self):
        
        def find_nth_occurance(string, char, n):
            val = -1
            for i in range(n):
                val = string.find(char, val + 1)
            return val
        

        for restaurant in tqdm(self.all_restaurants):
            
            URL = restaurant.url

            headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
            response = requests.get(URL.split('?')[0],headers=headers)     

            html = response.text

            pos = html.find('Top Dishes People Order')
            if  pos!= -1:
                remaining = html[pos:]
                start = find_nth_occurance(remaining, '>', 2)
                end = find_nth_occurance(remaining, '<', 3)
                restaurant.set_dish_liked(remaining[start + 1:end])

In [0]:
data_creator = ZomatoDatasetCreator("Allahabad")

In [0]:
data_creator.search_city()

Setting the city name to  Allahabad


In [0]:
data_creator.fetch_all_restaurants()


cost

Casual Dining
20 40 60 80 100 20 40 60 80 100 
Sweet Shop
20 40 60 80 86 
Quick Bites
20 40 60 80 100 20 40 60 80 100 
Fine Dining
2 
Bhojanalya
14 
Bar
9 
Dessert Parlour
20 40 60 65 
Café
20 40 45 
Dhaba
20 23 
Kiosk
13 
Bakery
20 33 
Lounge
8 
Beverage Shop
20 24 
Butcher Shop
2 
Food Court
7 
Food Truck
5 
Paan Shop
4 
Confectionery
1 
General Store
0 
Microbrewery
0 
rating

Casual Dining
20 40 60 80 100 20 40 60 80 100 
Sweet Shop
20 40 60 80 86 
Quick Bites
20 40 60 80 100 20 40 60 80 100 
Fine Dining
2 
Bhojanalya
14 
Bar
9 
Dessert Parlour
20 40 60 65 
Café
20 40 45 
Dhaba
20 23 
Kiosk
13 
Bakery
20 33 
Lounge
8 
Beverage Shop
20 24 
Butcher Shop
2 
Food Court
7 
Food Truck
5 
Paan Shop
4 
Confectionery
1 
General Store
0 
Microbrewery
0 

In [0]:
data_creator.populate_reviews()

HBox(children=(IntProgress(value=0, max=786), HTML(value='')))




In [0]:
data_creator.populate_dish_liked()

HBox(children=(IntProgress(value=0, max=786), HTML(value='')))

ConnectionError: ignored

In [0]:
data_list = data_creator.all_restaurants

In [0]:
print(data_list[285])

In [0]:
creator = ZomatoDataset("Allahabad")
creator.search_city()
creator.fetch_all_restaurants()

In [0]:
creator.populate_reviews()

In [0]:
creator.populate_dish_liked()

In [0]:
print(creator.all_restaurants[285])