# #################################################################

# A 10 restaurant scraper, with 5 required restaurants.

# Have also made a scraper which scrapes data for n webpages, for a particular location.

# Check other file.

# #################################################################

In [4]:
# Importing Libraries 
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from itertools import chain
import time

# Lists for storing the required data
# menu list will be a list of [name, description, price and logo]
# example for 1 restaurant the menu items list will be -
# [[name, description, price and logo], [name, description, price and logo],..]
restaurant_name = []
restaurant_logo = []
latitude = []
longitude = []
cuisine_tags = []
menu_items = []

# List of links of 10 restaurants
links = ['https://www.talabat.com/uae/restaurant/621133/ginos-deli-jlt?aid=1308',
         'https://www.talabat.com/uae/restaurant/645430/pasta-della-nona-jlt-jumeirah-lakes-towers?aid=1308',
         'https://www.talabat.com/uae/restaurant/50445/pizzaro-marina-3?aid=1308',
         'https://www.talabat.com/uae/restaurant/605052/the-pasta-guyz-dubai-marina?aid=1308',
         'https://www.talabat.com/uae/restaurant/621796/pizza-di-rocco-jumeirah-lakes-towers--jlt?aid=1308',
         'https://www.talabat.com/uae/restaurant/641201/pasta-broz-dubai-marina?aid=1272',
         'https://www.talabat.com/uae/restaurant/658231/jollof-house-al-barsha-3?aid=1272',
         'https://www.talabat.com/uae/restaurant/674091/churros-cone-cafe-bluewaters-island?aid=1272',
         'https://www.talabat.com/uae/restaurant/49587/layali-al-barsha-al-barsha-3?aid=1272',
         'https://www.talabat.com/uae/restaurant/656480/the-good-dough-jumeirah-lakes-towers--jlt?aid=1272'
        ]

In [5]:
# To calculate time of execution
start_time = time.time()

# Loop for parsing required data from restaurant pages in links
for i in range(0, len(links)):
    
    # Requesting individual restaurant webpages and loading soup
    r = requests.get(links[i])
    soup = BeautifulSoup(r.content,'html.parser')
    
    # Extracting data from script 
    scripts = soup.find('script', type='application/ld+json')
    
    # Loading to json for easy extraction
    data = json.loads(scripts.text)
    
    # To check if the link is for a grocery store
    if (data['@type'] != 'Restaurant'):
        
        continue
        
    # Extracting required data (name, logo, coords and tags)
    restaurant_name.append( data['name'] )
    restaurant_logo.append( data['image'] )
    latitude.append( data['geo']['latitude'] )
    longitude.append( data['geo']['longitude'] )
    cuisine_tags.append( data['servesCuisine'] )
    
    # Extracting menu data from script
    data_menu = soup.find('script', type = 'application/json')
    
    # Loading to json for easy extraction
    data_menu = json.loads(data_menu.text)
    
    # Extracting required data
    menu_list = data_menu['props']['pageProps']['initialMenuState']['menuData']['items']
    
    temp = []
    
    # Iterating over menu items to extract required data
    for j in range(len(menu_list)):

        temp.append(
            
            [menu_list[j]['name'], menu_list[j]['description'], 
             menu_list[j]['price'], menu_list[j]['image']]
            
        )
        
    menu_items.append(temp)
    
table = {
    
  "restaurant_name": restaurant_name,
  "restaurant_logo": restaurant_logo,
  "latitude": latitude,
  "longitude": longitude,
  "cuisine_tags": cuisine_tags,
  "menu_items": menu_items

}

# time it took
print(time.time() - start_time, 'seconds')

16.231388807296753 seconds


In [6]:
# Converting data to a pandas dataframe 
df = pd.DataFrame(table) 

# saving it to a csv file
df.to_csv('n_restaurants.csv')

Unnamed: 0,restaurant_name,restaurant_logo,latitude,longitude,cuisine_tags,menu_items
0,Gino's Deli,https://images.deliveryhero.io/image/talabat/r...,25.0648583638963,55.1383772692595,"Sandwiches, Pasta, Italian","[[Gino's Meal for One, Choice of Panuozzo, Fri..."
1,Pasta Della Nonna,https://images.deliveryhero.io/image/talabat/r...,25.0651332,55.13827815,"Italian, Pasta, Salad","[[Nonna's Meal for 1, Any Appetiser, Any Pasta..."
2,Pizzaro,https://images.deliveryhero.io/image/talabat/r...,25.083309,55.146559,"Italian, Pizza, Pasta","[[Mixed Seafood Pizza BOGO, Pizza sauce, mozza..."
3,The Pasta Guyz,https://images.deliveryhero.io/image/talabat/r...,25.0737758910733,55.1322831934299,"Italian, Pasta","[[Cheesy Garlic Bread (Vegetarian), A full min..."
4,Pizza Di Rocco,https://images.deliveryhero.io/image/talabat/r...,25.0650915,55.1387907,"Pizza, Pasta, Italian","[[Meal for One, Any Pizza, Appetiser & Soft Dr..."
5,Pasta Broz,https://images.deliveryhero.io/image/talabat/r...,25.0737893,55.132173,"Pasta, Italian, Pizza","[[Build Your Own Pasta Bowl, , 42, https://ima..."
6,Jollof House,https://images.deliveryhero.io/image/talabat/r...,25.08372,55.2019866,"African, International","[[Jollof Bowl, Your choice of jollof or fried ..."
7,Churros Cone Cafe,https://images.deliveryhero.io/image/talabat/r...,25.0815662853193,55.1229549786563,"Desserts, Spanish, Churros","[[Masafi Water, , 3, https://images.deliveryhe..."
8,Layali Al Barsha,https://images.deliveryhero.io/image/talabat/r...,25.0836534,55.2009971,"Arabic, Grills, Lebanese","[[Grill Azayim 2 Person, Mix grill layali al b..."
9,The Good Dough,https://images.deliveryhero.io/image/talabat/r...,25.0795422,55.1495587,"Healthy, Italian, Pizza","[[Chorizo Truff, Chorizo, tomato sauce, mozzar..."
