# Scraping Dinámico con Selenium: Extrayendo Datos de Restaurantes de Indonesia

Adaptado de: https://towardsai.net/p/programming/web-scraping-with-selenium-foods-around-jakarta-zomato

In [None]:
!pip install selenium

In [None]:
from selenium import webdriver
import pandas as pd

#### Indicar la Ruta donde se encuentra el driver para Selenium

In [None]:
chromepath = r'C:/TDAPPS/chromedriver.exe'
driver = webdriver.Chrome(chromepath)

#### Extrayendo el listado de enlaces para visitar la web de cada Restaurante

In [None]:
out_lst = []

for i in range(1, 20):
    print('Opening Search Pages ' + str(i))
    driver.get('https://www.zomato.com/jakarta/restoran?page={}'.format(i))
    print('Accessing Webpage OK \n')
    url_elt = driver.find_elements_by_class_name("fNzDaR")

    for j in url_elt:
        url = j.get_attribute("href")
        out_lst.append(url)

driver.close()

#### Almacenamos la lista de direcciones en un DataFrame

In [None]:
out_df = pd.DataFrame(out_lst, columns=['Website'])
out_df.head()

#### Utilizando la lista, descargamos los nombres de cada uno de los restaurantes

In [None]:
# Initialize Empty List that we will use to store the scraping data results
rest_name = []

driver = webdriver.Chrome(chromepath)

# Scrape the data by looping through entries in DataFrame
for url in out_df['Website']:
    driver.get(url)
    print('Accessing Webpage OK')

    try:
        name_anchor = driver.find_element_by_tag_name('h1')
        name = name_anchor.text
        rest_name.append(name)
    except NoSuchElementException:
        name = "404 Error"
        rest_name.append(name)
        pass

    print(f'Scraping Restaurant Name - {name} - OK')

driver.close()

#### Generamos un loop para extraer información de todos los restaurantes de la lista

In [None]:
# Initialize Empty List that we will use to store the scraping data results
rest_name = []
rest_type = []
rest_rating = []
rest_review = []
rest_address = []

# Initialize Webdriver
driver = webdriver.Chrome(chromepath)

# Scrape the data by looping through entries in DataFrame
for url in out_df['Website']:
    driver.get(url)
    print('Accessing Webpage OK')

    #Restaurant Name
    try:
        name_anchor = driver.find_element_by_tag_name('h1')
        name = name_anchor.text
        rest_name.append(name)
    except NoSuchElementException:
        name = "404 Error"
        rest_name.append(name)
        pass

    print(f'Scraping Restaurant Name - {name} - OK')

    #Restaurant Type
    rest_type_list = []
    rest_type_eltlist = driver.find_elements_by_class_name("heiMdG")

    for rest_type_anchor in rest_type_eltlist:
        rest_type_text = rest_type_anchor.text
        rest_type_list.append(rest_type_text)

    rest_type.append(rest_type_list)
    print(f'Scraping Restaurant Type - {rest_type_list} - OK')

    #Restaurant Rating
    try:
        rest_rating_anchor = driver.find_elements_by_class_name("cILgox")
        rest_rating_anchor = rest_rating_anchor[1]
        rest_rating_text = rest_rating_anchor.text
    except NoSuchElementException:
        rest_rating_text = "Not Rated Yet"
        pass

    rest_rating.append(rest_rating_text)
    print(f'Scraping Restaurant Rating - {rest_rating_text} - OK')

    #Restaurant Review
    try:
        rest_review_anchor = driver.find_elements_by_class_name("kEgyiI")
        rest_review_anchor = rest_review_anchor[0]
        rest_review_text = rest_review_anchor.text
    except NoSuchElementException:
        rest_review_text = "Not Reviewed Yet"
        pass

    rest_review.append(rest_review_text)
    print(f'Scraping Restaurant Review Counts - {rest_review_text} - OK')

    #Restaurant Address
    rest_address_list = []
    rest_address_anchor_list = driver.find_elements_by_class_name("fjhUCy")
    for rest_address_anchor in rest_address_anchor_list:
        rest_address_text = rest_address_anchor.text
        rest_address_list.append(rest_address_text)
    rest_address.append(rest_address_list)
    print(f'Scraping Restaurant Address - {rest_address_list} - OK')

    print('-------------------------------------------------------------------------------------------------------------------------------------------')

driver.close()

#### Consolidamos los resultados en un DataFrame

In [None]:
rdf = pd.DataFrame({"Name" : rest_name[:], "Type" : rest_type[:], "Rating" : rest_rating[:], 
                    "Reviews" : rest_review[:], "Address" : rest_address[:], })
rdf.head()

#### Podemos analizar los datos, o exportarlos

In [None]:
rdf.to_csv('restaurantes.csv')

Elaborado por Luis Cajachahua bajo licencia MIT (2022)