In [1]:
import inspect
import json
import logging
import os.path as path
from json import loads
from os import makedirs
from pathlib import Path

import validators
from bs4 import BeautifulSoup
from pandas import DataFrame, read_csv, concat
from requests import get, HTTPError

logging.basicConfig(format="%(asctime)s - [%(levelname)s]\t%(message)s",
                    datefmt='%d-%b-%y %H:%M:%S')



In [2]:
url = "https://deliveroo.co.uk/menu/London/king's-cross/10709-kfc-kings-cross-uk?day=today&postcode=EC&time=ASAP"
#"https://deliveroo.co.uk/menu/London/hop-west-end/morrisons-new-oxford-street?day=today&postcode=EC&time=ASAP",
#"https://deliveroo.co.uk/menu/London/camden-road/three-uncles-camden?day=today&postcode=EC&time=ASAP",
#"https://deliveroo.co.uk/menu/London/highbury/patty-guy-at-the-horatia?day=today&postcode=EC&time=ASAP"]

In [3]:
def url_validator(link: str) -> bool:
    """
    The function validates whether a given string is a valid URL of talabat.com or not
    :rtype: bool
    :param link: A string to be validated as URL or not
    :return: True if URL, else False
    """
    if validators.url(link) and (link.startswith('https://deliveroo.co.uk/')):
        return True
    else:
        return False

def __init__(self, url: str, base_dir: str = 'crawled_data',
             f_name: str = 'crawled_data', menu_dir: str = 'menus') -> None:
    """
    :rtype: None
    :param url: URL for data crawl
    :param base_dir: Directory to save data
    :param menu_dir: Directory to save menus
    """
    self.__flag = True
    self.__details_json = None
    self.__bs4_data = None

    self.__restaurant_details = {}
    self.__restaurant_menu_details = []

    frame = inspect.stack()[1]
    filename = frame[0].f_code.co_filename
    filepath = Path(path.dirname(filename))

    self.__filename = f_name

    if path.isabs(base_dir):
        self.__base_dir = Path(base_dir)
    else:
        self.__base_dir = filepath / base_dir

    if path.isabs(menu_dir):
        self.__menu_dir = Path(menu_dir)
    else:
        self.__menu_dir = self.__base_dir / menu_dir

    try:
        if self.url_validator(url):
            self.url = url
        else:
            self.url = url
            raise TypeError(f"'{url}' is not a valid URL")

    except TypeError as e:
        self.url = None
        self.__flag = False
        logging.error(e)

    except Exception as e:
        self.url = None
        self.__flag = False
        logging.debug(e)

    # calling other function(s)
    if self.__flag:
        self.__fetch_details()

In [4]:
def fetch_details(url):
    try:
        raw = get(url, allow_redirects=True, headers=(
            {
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                              '(KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36',
            }
        ))

        status_code = raw.status_code
        reason = raw.reason

        if status_code != 200:
            raise HTTPError(status_code, reason)

        bs4_data = BeautifulSoup(raw.content, 'lxml')
        print(bs4_data)
        return bs4_data
    
    except HTTPError as e:
        flag = False
        err = f"{self.url} - {e.strerror} [{e.errno}]"
        bs4_data = None
        logging.error(err)


    except Exception as e:
        flag = False
        bs4_data = None
        logging.debug(e)









In [5]:
url_validator(url)

True

In [6]:
bs4_data = fetch_details(url)

<!DOCTYPE html>
<html dir="ltr" lang="en"><head><title>KFC - Kings Cross delivery from King's Cross - Order with Deliveroo</title><meta content="The best local restaurants and takeaways are here to deliver. Order on Deliveroo today!" name="description"/><meta name="nosnippet"/><link href="https://deliveroo.co.uk/menu/London/king's-cross/10709-kfc-kings-cross-uk" rel="canonical"/><meta content="https://deliveroo.co.uk/menu/London/king's-cross/10709-kfc-kings-cross-uk" property="og:url"/><meta content="KFC - Kings Cross on Deliveroo" property="og:title"/><meta content="Serving up amazing food, KFC - Kings Cross sits in the heart of King's Cross. With dishes you're guaranteed to love, order now for delivery within 32 minutes" property="og:description"/><meta content="https://rs-menus-api.roocdn.com/images/29f7b700-9280-466d-b5f5-d3ef036b49f5/image.jpeg?width=1200&amp;height=630&amp;fit=crop" property="og:image"/><meta content="1200" property="og:image:width"/><meta content="630" property=

In [60]:
def make_json (bs4_data):
    flag = True
    try:
        try:
            data = bs4_data.select('#__NEXT_DATA__')[0].text
            details_json = loads(data)
            return details_json
            #print(details_json)

        except Exception as e:
            flag = False
            details_json = None
            logging.error(e)


    except Exception as e:
        flag = False
        details_json = None
        logging.error(e)

    # call other function(s)
    if flag:
        fetch_restaurant_details()

In [61]:
details_json = make_json(bs4_data)

In [89]:
details_json

{'props': {'initialState': {'account': {'loading': False,
    'errorMessage': '',
    'addresses': [],
    'rewards': {},
    'benefits': {},
    'subscription': {'cancellationDialog': {},
     'paymentModal': {'loading': False, 'visible': False},
     'isPauseAndResumeSubscriptionModalOpen': False,
     'invoices': {'loading': False, 'error': False, 'subscriptionEvents': []}},
    'vouchers': {'elements': [],
     'emptyState': {},
     'redeemed': {'loading': False, 'error': None, 'success': None}},
    'deletion': {'isSubmitting': False, 'isVerifying': False},
    'tierSwitching': {'viewPlans': {},
     'availablePlans': [],
     'cancelPlan': {},
     'confirmPlan': {},
     'currentPlan': {},
     'isLoading': False,
     'success': False,
     'successBannerText': ''},
    'banner': {'type': 'error'},
    'password': {},
    'challenges': {'displayedChallengeDetailsModal': {'isLoading': False},
     'challengeAcceptedToast': {'isOpen': False},
     'acceptChallengeIsLoading': Fal

In [104]:
def fetch_restaurant_details(details_json=details_json):
    #TODO: Check the name here
    restaurant = details_json['props']['initialState']['menuPage']['menu']['meta']['restaurant']
    name = restaurant['name']
    # in place of restaurant slug we'll use a lower case name, replacing he spaces w _
    formatted_name = name.lower().replace(" ", "_")
    address = restaurant['location']['address']['address1']
    #neighborhood = restaurant['location']['address']['neighborhood']

    print("Name:", formatted_name)
    print("Address:", address)
    #print("Neighborhood:", neighborhood)    

            #restaurant_details['restaurant_address'] = [restaurant_json['location']['address']['address1']]
            #print(restaurant_details)
    #restaurant_json2 = self.__details_json['props']['initialState']['menuPage']['menu']['meta']['customerLocation']
    #print(restaurant_json2)
    # self.__restaurant_details['latitude'] = [float(restaurant_json2['lat'])]
    # self.__restaurant_details['longitude'] = [float(restaurant_json2['lon'])]
    # self.__restaurant_details['neighborhood'] = [restaurant_json2['neighborhood']]
    # self.__restaurant_details['postcode'] = [restaurant_json2['postcode']]
    # self.__restaurant_details['geohash'] = [restaurant_json2['geohash']]

    # Modified name will replace 'restaurantSluG'


    # Get restaurant name and put it in lower case for the filename
    # restaurant_name = self.__restaurant_details['restaurant_name']
    # restaurant_name = restaurant_name[0]
    # self.__modified_name = restaurant_name.lower().replace(" ", "")
    #print(modified_name)

    #self.__restaurant_details['menu_file'] = [self.__restaurant_details['restaurant_name'].lower().replace(" ", "_") + '.csv']

#     except KeyError as e:
#         flag = False
#         restaurant_details = {}
#         logging.error(f'Invalid key {e}')

#     except Exception as e:
#         restaurant_details = {}
#         flag = False
#         logging.error(e)

#     # call other function(s)
#     if flag:
#         fetch_restaurant_menu_details()


In [105]:
fetch_restaurant_details(details_json)

Name: kfc_-_kings_cross
Address: 323 Grays Inn Road, London, WC1X8PX


In [106]:
def fetch_restaurant_location(details_json):
    rest = details_json['props']['initialState']['menuPage']['menu']['meta']['customerLocation']

    lat = rest['lat']
    lon = rest['lon']
    city = rest['city']
    neighborhood = rest['neighborhood']
    postcode = rest['postcode']
    cityId = rest['cityId']
    zoneId = rest['zoneId']
    geohash = rest['geohash']

    # You can now use these variables as needed, for example, print them:
    print("Latitude:", lat)
    print("Longitude:", lon)
    print("City:", city)
    print("Neighborhood:", neighborhood)
    print("Postcode:", postcode)
    print("City ID:", cityId)
    print("Zone ID:", zoneId)
    print("Geohash:", geohash)

In [107]:
fetch_restaurant_location(details_json)

Latitude: 51.5287716
Longitude: -0.1320632
City: London
Neighborhood: Somers Town
Postcode: EC
City ID: 1
Zone ID: 7481
Geohash: gcpvhuxbfenx


In [11]:
def __fetch_restaurant_menu_details(self):
    # Perhaps add this?     # Attempt to parse the JSON string -> parsed_json = json.loads(json_string)
    try:
        # Convert the dictionary to a JSON string
        #json_string = json.dumps(self.__details_json)
        #self.__details_json = json.loads(json_string)
        
        #print("The menu dictionary is JSON-like.")
    menu = details_json['props']['initialState']['menuPage']['menu']['meta']['items']
    for item in restaurant_json:
            name = item['name']
            description = item['description']
            price = item['price']['formatted']
    
            nutritional_info = None
            if 'nutritionalInfo' in item and item['nutritionalInfo'] is not None:
                nutritional_info = item['nutritionalInfo']['energyFormatted']
    
            url = None
            if 'image' in item and item['image'] is not None:
                url = item['image']['url']
            #restaurant_details['restaurant_name']
            #name= restaurant_json['name']
            
            print("Name:", name)
            print("Description:", description)
            print("Price:", price)
            print("URL:", url)
            print("Nutritional Info:", nutritional_info)
            print("\n")        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        menu_json = self.__details_json['props']['initialState']['menuPage']['menu']['meta']['items']

        if len(menu_json) < 1:
            self.__flag = False
            return

        for menu in menu_json:
            temp = {
                'item_name': menu['name'],
                'item_description': menu['description'],
                'item_nutritional_info' : menu['nutritionalInfo']['energyFormatted'],
                'item_price': menu['price']['formatted'],
                'item_image': menu['image']['url'],
            }
            self.__restaurant_menu_details.append(temp)

    except KeyError as e:
        self.__flag = False
        self.__restaurant_menu_details = []
        logging.error(f'Invalid key {e}')

    except Exception as e:
        self.__flag = False
        self.__restaurant_menu_details = []
        logging.error(e)

In [12]:
def get_restaurant_details(self) -> None | DataFrame:
    if not self.__flag:
        return None
    return DataFrame(self.__restaurant_details)

def get_restaurant_menu(self) -> None | DataFrame:
    if not self.__flag:
        return None
    return DataFrame(self.__restaurant_menu_details)

def write_to_csv(self):
    if self.__flag and self.__restaurant_details != {} and self.__restaurant_menu_details != []:
        self.__write_restaurant_details()
        self.__write_restaurant_menu()

    else:
        logging.error(f'Cannot write into file')

def __write_restaurant_details(self):
    makedirs(self.__base_dir, exist_ok=True)
    filename = self.__base_dir / (self.__filename + '.csv')

    try:
        df_1 = read_csv(filename)
        df_2 = DataFrame(self.__restaurant_details)
        df = concat([df_1, df_2])
        df.drop_duplicates(inplace=True)
        df.to_csv(filename, index=False)

    except FileNotFoundError:
        df = DataFrame(self.__restaurant_details)
        df.to_csv(filename, index=False)

    except Exception as e:
        logging.error(f'{e}\nCannot write restaurant details')

def __write_restaurant_menu(self):
    makedirs(self.__menu_dir, exist_ok=True)
    filename = self.__menu_dir / self.__restaurant_details['menu_file'][0]

    try:
        df = DataFrame(self.__restaurant_menu_details)
        df.to_csv(filename, index=False)
    except Exception as e:
        logging.error(f'{e}\nCannot write restaurant menu details')


## ISSUE W FETCH DETAILS