# Scraping Lisbon's top 270 restaurants from Zomato

## Importing Needed Packages

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

## Scraping initial Information

Zomato requires the user to be indentified through their user-agent. 
For more context: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent

Google "what is my user agent" to find out your user-agent string

In [20]:
# replace this with your user-agent string
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0',
         "Accept-Language": "en-US,en;q=0.5"}

url = 'https://www.zomato.com/grande-lisboa'
response = requests.get(url, headers=header) # include your user-agent in the request to have access to Zomato
soup = BeautifulSoup(response.text, "html.parser")

In [21]:
all_content = soup.findAll('div', {'class':'sc-bke1zw-0 fIuLDK'})[-1]

area_names_ = all_content.findAll('h5')
area_names = []
for i in range(len(area_names_)):
    area = area_names_[i].text.split(' ')[:-2]
    name = ' '.join(area)
    area_names.append(name)

print(f"Number of areas (should be 30): {len(area_names)}")  


area_links_ = all_content.findAll('a')
area_links = []
for i in range(len(area_links_)):
    link = area_links_[i]['href']
    area_links.append(link.replace('pt/',''))

print(f"Number of links (should be 30): {len(area_links)}")

Number of areas (should be 30): 30
Number of links (should be 30): 30


## Getting needed information for each of the 30 areas in Lisbon

In [31]:
restaurant_area = []
restaurant_name = []
restaurant_tags = []
restaurant_rating = []
restaurant_price = []
restaurant_link = []

for link, area in zip(area_links, area_names):
    
    resp2=requests.get(link, headers=header)
    soup2=BeautifulSoup(resp2.text, "html.parser")
    all_contents2=soup2.findAll('div', {'class':'sc-gVyKpa ieQGou'})[0]
    
    # Area of each restaurant
    for i in range(9): #there are 9 restaurants per each of the 30 areas
        restaurant_area.append(area)
    
    # Name of each restaurant
    restaurants_name_ = all_contents2.findAll('h4')
    for i in range(len(restaurants_name_)):
        restaurant_name.append(restaurants_name_[i].text)
    
    # Tags of each restaurant
    tags_ = all_contents2.findAll('p',  class_=re.compile("sc-1hez2tp-0 sc"))
    restaurant_tags_ = [tags_[i-1].text for i in range(len(tags_)) if 'for two' in tags_[i].text]
    for i in range(len(restaurant_tags_)):
        restaurant_tags.append(restaurant_tags_[i])
    
    # Rating of each restaurant
    restaurant_rating_ = all_contents2.findAll('div', {'class':'sc-1q7bklc-1 cILgox'})
    for i in range(len(restaurant_rating_)):
        restaurant_rating.append(restaurant_rating_[i].text)

    # Prices of each restuarant
    prices = all_contents2.findAll('p',  class_=re.compile("sc-1hez2tp-0 sc"))
    restaurant_price_ = [i for i in prices if 'for two' in i.text]
    for i in range(len(restaurant_price_)):
        restaurant_price.append(float(restaurant_price_[i].text[:2]))
           
    # Link of each restaurant
    links = all_contents2.findAll('a', class_=re.compile("sc-"))
    restaurant_link_ = [i for i in links if 'info' in str(i)][::2]
    for i in range(len(restaurant_link_)):
        restaurant_link.append('https://www.zomato.com' + restaurant_link_[i]['href'])

In [32]:
# checking length of each list (should by 270)
print(len(restaurant_area))
print(len(restaurant_name))
print(len(restaurant_tags))
print(len(restaurant_rating))
print(len(restaurant_price))
print(len(restaurant_link))

270
270
270
270
270
270


### Creating dataframe and saving to .csv

In [36]:
restaurant_data = {'Name': restaurant_name,
                   'City Area': restaurant_area, 
                   'Price For Two': restaurant_price, 
                   'Ratings': restaurant_rating, 
                   'Tags': restaurant_tags, 
                   'Link': restaurant_link}

df = pd.DataFrame(restaurant_data)
df.to_csv('restaurants.csv', index=False)

In [37]:
df.head()

Unnamed: 0,Name,City Area,Price For Two,Ratings,Tags,Link
0,Ao 26 Vegan Food Project,Chiado,40.0,4.6,"Vegetarian, Healthy Food, Vegan",https://www.zomato.com/grande-lisboa/ao-26-veg...
1,TOPO Chiado,Chiado,45.0,4.0,"Beverages, Pizza, Burger, Finger Food",https://www.zomato.com/grande-lisboa/topo-chia...
2,Kaffeehaus,Chiado,30.0,4.3,"Austrian, Cafe, Desserts, Finger Food",https://www.zomato.com/grande-lisboa/kaffeehau...
3,To.B,Chiado,25.0,4.2,Burger,https://www.zomato.com/grande-lisboa/to-b-chia...
4,Sea Me - Peixaria Moderna,Chiado,70.0,4.5,"Fresh Fish, Seafood, Japanese, Fusion",https://www.zomato.com/grande-lisboa/sea-me-pe...


In [38]:
df.tail()

Unnamed: 0,Name,City Area,Price For Two,Ratings,Tags,Link
265,Bowls&Bar,São Bento,15.0,3.8,"Healthy Food, Finger Food, Juices, Cafe, Bever...",https://www.zomato.com/grande-lisboa/bowls-bar...
266,Hamburgueria do Bairro,São Bento,16.0,4.1,"Burger, Gourmet Fast Food",https://www.zomato.com/grande-lisboa/hamburgue...
267,Zapata,São Bento,35.0,4.1,"Portuguese, Seafood",https://www.zomato.com/grande-lisboa/zapata-sã...
268,Veramente Pizza & Vino,São Bento,30.0,3.7,"Pizza, Italian",https://www.zomato.com/grande-lisboa/veramente...
269,Break,São Bento,20.0,4.0,"Finger Food, Portuguese, Petiscos",https://www.zomato.com/grande-lisboa/break-são...
