# Libraries

We will mainly use BeautifulSoup and selenium to help us with web scrapping. To store and display the obtained data, we'll use Pandas.

In [14]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd 


# Preprocessing and previous work

First, we will obtain all the required data looking for its respective class tag in the html parsed by BeautifulSoup \
__Restaurant name__: "result-title"\
__Address of restaurant__: "address-text"\
__Qualification__: "rating-review-count"\
__Number of reviews__: (it is inside the "rating-review-count" class, but at the same time, it is inside of "a" atribute in HTML) "a: review_count"\
__Number of mentions__: "review-mention-block"

In [15]:
# hallamos todos los divs correspondientes con ayuda de bs
def find_divs(soup):
    # for restaurant name
    restaurant_names_html = soup.find_all("div", {"class": "result-title"})
    # for address
    restaurant_address_html = soup.find_all("div", {"class": "address-text"})
    # for qualification
    restaurant_qualifications_html = soup.find_all("div", {"class":  "rating-review-count"})
    # for number of reviews
    restaurant_reviews_html = soup.find_all("div", {"class":  "rating-review-count"})
    # for number of mentions
    restaurant_mentions_html = soup.find_all("div", {"class": "review-mention-block"})
    return restaurant_names_html, restaurant_address_html, restaurant_qualifications_html, restaurant_reviews_html, restaurant_mentions_html


The above returns the entire div with the html where the required attribute is found, so we must extract what we care about (which would be the name of the restaurant, the address, the qualification, the number of reviews and the number of mentions)

In [16]:
def html_processing(restaurant_names_html, restaurant_address_html, restaurant_qualifications_html, restaurant_reviews_html, restaurant_mentions_html):    
    # for restaurant name
    restaurant_names = []
    for restaurant in restaurant_names_html:
        if restaurant.span != None:
            restaurant_names.append(restaurant.span.text)
            
    # for address
    restaurant_addresses = []
    for address in restaurant_address_html:
        if address.text != None:
            restaurant_addresses.append(address.text)
            
    # for qualification
    restaurant_qualifications = []
    for qualification in restaurant_qualifications_html:
        if qualification.span.get('alt') != None:
            temp_qual = qualification.span.get('alt').split()[0] 
            qual = temp_qual.replace(",",".")
            restaurant_qualifications.append(float(qual))
            
    # for number of reviews
    restaurant_reviews = []
    for review in restaurant_reviews_html:
        if review.text != None:
            temp_rev = review.text.split()[0] 
            rev = temp_rev.replace(",","")
            restaurant_reviews.append(int(rev))
            
    # for number of mentions
    restaurant_mentions = []
    for mention in restaurant_mentions_html:
        if mention.text != None:
            temp_men = mention.text.split()[0]
            men = temp_men.replace(",","")
            restaurant_mentions.append(int(men))
    
    # we return the first 10 restaurants
    return restaurant_names[:10], restaurant_addresses[:10], restaurant_qualifications[:10], restaurant_reviews[:10], restaurant_mentions[:10]



# Data Visualization with Pandas

Finally, we make use of the previously created functions and the imported libraries to be able to visualize the data obtained. In order for a search to be carried out in a parameterized way, a function will be carried out that will show us on the screen a dataframe with the data obtained, which will receive as parameters the query of the type of food that is being searched for and optionally, the parameter of the location I want to be the type of food I'm looking for, which in this case will be an integer denoting a geolocation ID. Unfortunately, these IDs had to be obtained manually, doing the query yourself in the browser and taking the ID from the url to which I was redirected when filtering for a particular location. As this parameter is optional, if not added, it simply searches restaurants around the world

In [17]:
def show_data(query_food, query_place=None):
#     query_food = "hamburgers"
    url = 'https://www.tripadvisor.com/Search?q=' + query_food
    driver = webdriver.Chrome()
    driver.get(url)

    url = driver.current_url
    url = url.replace("&searchNearby=true","&searchNearby=false" )
    driver.get(url)

    if query_place != None:    
        url = driver.current_url + "&geo=" + str(query_place)
        driver.get(url)

    soup = BeautifulSoup(driver.page_source, 'html.parser') 

    # we use the previously created functions to pass the data to the dataframe
    restaurant_names_html, restaurant_address_html, restaurant_qualifications_html, restaurant_reviews_html, restaurant_mentions_html = find_divs(soup)
    restaurant_names, restaurant_addresses, restaurant_qualifications, restaurant_reviews, restaurant_mentions = html_processing(restaurant_names_html, restaurant_address_html, restaurant_qualifications_html, restaurant_reviews_html, restaurant_mentions_html)

    # dictionary of lists created with the above data
    dicc = {'Restaurant name': restaurant_names,
            'Address': restaurant_addresses, 
            'Qualification (0 to 5)': restaurant_qualifications,
            'Number of reviews': restaurant_reviews,
            'Number of mentions': restaurant_mentions} 

    df = pd.DataFrame(dicc)
    display(df)


In [18]:
# hamburgers all over the world
show_data("hamburgers")

Unnamed: 0,Restaurant name,Address,Qualification (0 to 5),Number of reviews,Number of mentions
0,Haunted Hamburger,"410 Clark Street, Jerome, Arizona",4.5,1846,744
1,Hamburger Joe's,"712 48th Ave S, North Myrtle Beach, South Caro...",4.5,2126,785
2,Shake Shack,"1216 18th St NW, Washington DC, District of Co...",4.5,8610,247
3,Burgermeister Schlesisches Tor,"Oberbaumstrasse 8, Berlin, Germany",4.5,5141,119
4,A Cultura do Hamburguer,"Rua Salgadeiras 38, Lisbon, Lisbon District, C...",4.5,2987,158
5,Swensons Drive-In,"658 E Cuyahoga Falls Ave, Akron, Ohio",4.5,846,101
6,Hodad's,"5010 Newport Ave, San Diego, California",4.5,2318,223
7,Hamburger Mary's,"110 W Church St, Orlando, Florida",4.5,620,184
8,Louis' Lunch,"263 Crown St, New Haven, Connecticut",4.0,586,221
9,Teddy's Bigger Burgers - Waikiki,"134 Kapahulu Ave, Honolulu, Oahu, Hawaii",4.5,2044,87


In [19]:
# sushis from santiago, chile
show_data("sushi", 294305)

Unnamed: 0,Restaurant name,Address,Qualification (0 to 5),Number of reviews,Number of mentions
0,Naoki,"Avenida Vitacura 3875, Santiago, Santiago Metr...",4.5,1136,92
1,Ramen Kintaro,"Monjitas 460, Santiago, Santiago Metropolitan ...",4.5,519,61
2,Restaurant Japon,"Baron Pierre de Coubertin 39, Santiago, Santia...",4.5,587,38
3,Panko,"Jose Victorino Lastarria 53, Santiago, Santiag...",4.5,670,64
4,Ichiban,"Avenida Padre Hurtado Norte 1521, Santiago, Sa...",4.5,562,54
5,Matsuri,"Avenida Presidente Kennedy # 4601, Santiago, S...",4.5,595,43
6,Karai by Mitsuharu,"Isidora Goyenechea No 3000, Santiago, Santiago...",4.5,231,11
7,Fukai,"Patio Bellavista, Santiago, Santiago Metropoli...",3.5,483,32
8,Nippon Sushi,"Orrego Luco 40, Santiago, Santiago Metropolita...",4.0,119,16
9,NIU Sushi - Lyon,"Avenida Providencia 2222, Santiago, Santiago M...",4.0,128,9


In [20]:
# pizzas from new york, usa
show_data("pizzas", 60763)

Unnamed: 0,Restaurant name,Address,Qualification (0 to 5),Number of reviews,Number of mentions
0,Bleecker Street Pizza,"69 7th Ave S, New York City, New York",4.5,1539,1277
1,NY Pizza Suprema,"413 8th Ave, New York City, New York",4.5,1290,895
2,Joe's Pizza - Carmine St,"7 Carmine St, New York City, New York",4.5,1590,1060
3,Joe's Pizza,"1435 Broadway, New York City, New York",4.5,459,306
4,SottoCasa Pizzeria,"227 Lenox Ave, New York City, New York",5.0,414,217
5,John's of Bleecker Street,"278 Bleecker St, New York City, New York",4.5,1800,1411
6,Rubirosa,"235 Mulberry St, New York City, New York",4.5,1274,779
7,John's of Times Square,"260 W 44th St, New York City, New York",4.0,6063,4712
8,Don Antonio,"309 W 50th St, New York City, New York",4.5,2178,1390
9,Numero 28 Pizzeria,"28 Carmine St, New York City, New York",4.5,1108,581
