# TripAdvisor Scraping

### Librerías

In [2]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

### Obtención del HTML

In [3]:
url = "https://www.tripadvisor.com/Restaurant_Review-g60864-d2333058-Reviews-Olde_Nola_Cookery-New_Orleans_Louisiana.html"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/120.0.0.0 Safari/537.36"}
options = Options()
options.add_argument("--headless=new")  # Nueva versión de headless mode
options.add_argument("--disable-gpu")  # Desactiva el uso de GPU
options.add_argument("--no-sandbox")  # Evita problemas en servidores
options.add_argument("--disable-dev-shm-usage")  # Evita errores en entornos con poca memoria compartida
options.add_argument("--blink-settings=imagesEnabled=false")  # No carga imágenes para optimizar
options.add_argument("--disable-extensions")  # Desactiva extensiones innecesarias
options.add_argument("--disable-infobars")  # Evita que muestre avisos de automatización
options.add_argument("--mute-audio")  # Silencia cualquier posible sonido
options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36")

service = Service()
driver = webdriver.Chrome(service=service, options=options)
driver.get(url)

soup = bs(driver.page_source, "html.parser")

driver.quit()



### Obtención de la info básica

In [None]:
# Nombre
name = soup.find("h1").text.strip()
# Dirección
address = soup.find("span", {"data-automation" : "restaurantsMapLinkOnName"}).text.strip()
# Valoración media
rate = soup.find("div", {"data-automation" : "bubbleRatingValue"}).text.strip()
# Número de valoraciones
n_ratings = soup.find("div", {"data-automation" : "bubbleReviewCount"}).text.strip()
# Tipos de cocina y precio - OPCIÓN B
search = [res for res in soup.find_all("span") if "Restaurants in" in res.text.strip()][0]
cuisines_price = [c.text.strip() for c in search.find_next_sibling()]
cuisines_price = [c for c in cuisines_price if c != ","]
cuisines = cuisines_price[:-1]
price = cuisines_price[-1]
# Resumen, tipos de cocina, dietas y momentos de comida
search = [res for res in soup.find_all("div") if res.text.strip() == "About"][0]
try:
    about = search.find_next_sibling().text.strip()
except Exception:
    about = None
try:
    diets = [r for r in search.find_next_sibling().find_all("div") if r.text.strip().lower() == "special diets"][0]
    diets = diets.find_next_sibling().text.strip()
except Exception:
    diets = None
try:
    cuisines = [r for r in search.find_next_sibling().find_all("div") if r.text.strip().lower() == "cuisines"][0]
    cuisines = cuisines.find_next_sibling().text.strip()
except Exception:
    cuisines = None
try:
    meal_times = [r for r in search.find_next_sibling().find_all("div") if r.text.strip().lower() == "meal types"][0]
    meal_times = meal_times.find_next_sibling().text.strip()
except Exception:
    meal_times = None
# Resumen de reseñas
try:
    search = [res for res in soup.find_all("div") if res.text.strip() == "Reviews summary"][0]
    reviews_summary = search.find_parents()[1].find_next_siblings()[1].find().text.strip()
    reviews_summary = reviews_summary.replace("Jump to all reviews", "").strip()
except Exception:
    reviews_summary = None


In [31]:
print("Nombre:", name)
print("Dirección:", address)
print("Valoración media:", rate)
print("Número de valoraciones:", n_ratings)
print("Tipos de cocina:", cuisines)
print("Precio:", price)
print("Dietas:", diets)
print("Momentos de comida:", meal_times)
print("Resumen de reseñas:", reviews_summary)
print("Resumen:", about)

Nombre: Olde Nola Cookery
Dirección: 205 Bourbon St, New Orleans, LA 70130-2204
Valoración media: 4.6
Número de valoraciones: (3,910 reviews)
Tipos de cocina: Cajun & Creole, Seafood
Precio: $$ - $$$
Dietas: Vegetarian friendly, Vegan options, Gluten free options
Momentos de comida: Lunch, Dinner, Brunch, Late Night
Resumen de reseñas: Olde Nola Cookery is celebrated for its authentic New Orleans flavors, with dishes like gumbo and po'boys frequently earning praise from patrons. Nestled in the heart of Bourbon Street, its prime location and reasonable prices are a hit among diners. The vibrant atmosphere, often buzzing with live music, adds to the charm, though some guests consider it loud.

Service at this establishment is consistently lauded for its attentiveness and efficiency, ensuring minimal wait times. However, a few diners note occasional inconsistencies in the food's quality.
Resumen: "Cooked by Locals, Spiced with History" Located on the famous Bourbon Street in the heart of 

In [11]:
search = [res for res in soup.find_all("div") if res.text.strip() == "All reviews"][0]
search

<div><div class="JajTY f K"><div class="biGQs _P fiohW kSNRl uuBRH">All reviews</div></div></div>

In [29]:
res = soup.find("div", {"data-test-target" : "reviews-tab"}).find("div", class_="mSOQy")
res

<div class="mSOQy"><div class="fHkot" data-test-target="low_content_cta"><div class=""><div class="rpPMZ e u Fl"><div class="biGQs _P fiohW ngXxk">No reviews for this property yet.</div><button class="rmyCe _G B- z _S c Wc wSSLS AeLHi QHaGY" data-automation="ugcNoReviewsWarCta" type="button"><span class="biGQs _P ttuOS"><span class=""><svg aria-hidden="true" class="d Vb egaXP UmNoP" height="20px" viewbox="0 0 24 24" width="20px"><path clip-rule="evenodd" d="M13.788 7.293 6.19 14.868l-.816 3.746 3.724-.839 7.588-7.583zm3.96 1.84-2.898-2.9.556-.554A2.32 2.32 0 0 1 17.02 5c.378 0 .73.104 1.031.315l.01.007.012.008c1.12.757 1.221 2.26.326 3.151zm-7.896 10.01-5.99 1.35q-.032.009-.064.007a.297.297 0 0 1-.29-.36l1.31-6.023 9.529-9.5A3.82 3.82 0 0 1 17.02 3.5c.66 0 1.318.184 1.893.587a3.536 3.536 0 0 1 .546 5.457z" fill-rule="evenodd"></path></svg>Write a review</span></span></button></div></div></div><div class="DnVcE e"><div class="HLCmq"><div class="f e F1 _Z"><div class="H"><div class="E" s

In [23]:
res = soup.find_all("div", class_="JVaPo Gi kQjeB")
res

[]