# TripAdvisor Reviews Scraper (Selenium)

This notebook scrapes reviews from TripAdvisor for "Taman Wisata Kertha Gosa".
Target: 195+ reviews (Handling pagination).

In [1]:
# Dependencies
import time
import pandas as pd
import sqlite3
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys

## Configuration

In [2]:
# TARGET URL for "Taman Wisata Kertha Gosa"
URL = "https://www.tripadvisor.co.id/Attraction_Review-g1953908-d9455041-Reviews-Taman_Wisata_Kertha_Gosa-Klungkung_Bali.html"

# Target quantity
MAX_REVIEWS = 250 # Set higher than 195 to be safe
DB_NAME = "tripadvisor_kertagosa.db"

In [3]:
def init_driver():
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")  # Debug first, then headless if stable
    options.add_argument("--lang=en-US") # Force English generally
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.maximize_window()
    return driver

In [4]:
def scrape_tripadvisor(url, max_reviews=100):
    driver = init_driver()
    reviews_data = []
    
    try:
        driver.get(url)
        print("Waiting for page load...")
        time.sleep(5)
        
        # Handle cookies/popups if any (TripAdvisor sometimes has them)
        # Usually manual closing is fine for now if watching.
        
        while len(reviews_data) < max_reviews:
            
            # 1. Expand "Read more" / "Baca selengkapnya"
            try:
                # Look for span containing "more" or "selengkapnya"
                more_btns = driver.find_elements(By.CSS_SELECTOR, "span.taLnk.ulBlueLinks")
                for btn in more_btns:
                    if "more" in btn.text.lower() or "selengkapnya" in btn.text.lower():
                        driver.execute_script("arguments[0].click();", btn)
                        time.sleep(0.5)
            except Exception as e:
                pass
                
            time.sleep(2) # Wait for expansion

            # 2. Find Review Cards
            # Container: div.review-container
            cards = driver.find_elements(By.CSS_SELECTOR, "div.review-container")
            if not cards:
                cards = driver.find_elements(By.CSS_SELECTOR, "div[data-test-target='reviews-tab'] div.cWwQK")
            
            print(f"Found {len(cards)} cards on this page.")
            
            for card in cards:
                if len(reviews_data) >= max_reviews:
                    break
                    
                try:
                    # ID
                    r_id = card.get_attribute("data-review-id")
                    
                    # Author
                    try:
                        # Attempt 1: Old design
                        author = card.find_element(By.CSS_SELECTOR, "div.info_text > div").text
                        if not author: author = card.find_element(By.CSS_SELECTOR, "span.ui_header_link").text
                    except:
                        try:
                            # Attempt 2: New design
                            author = card.find_element(By.CSS_SELECTOR, "a.ui_header_link").text
                        except:
                            author = "Unknown"
                    
                    # Rating
                    # Class usually: ui_bubble_rating bubble_50
                    try:
                        rating_elem = card.find_element(By.CSS_SELECTOR, "span.ui_bubble_rating")
                        rating_class = rating_elem.get_attribute("class") # e.g., "ui_bubble_rating bubble_50"
                        # Extract 50 -> 5.0
                        score_part = rating_class.split("bubble_")[-1] # "50"
                        rating = float(score_part) / 10.0
                    except:
                        rating = 0.0
                        
                    # Date
                    try:
                        date_text = card.find_element(By.CSS_SELECTOR, "span.ratingDate").get_attribute("title")
                        if not date_text:
                             date_text = card.find_element(By.CSS_SELECTOR, "span.ratingDate").text
                    except:
                        date_text = ""
                        
                    # Text
                    try:
                        text_elem = card.find_element(By.CSS_SELECTOR, "p.partial_entry")
                        text = text_elem.text
                    except:
                        try:
                           text_elem = card.find_element(By.CSS_SELECTOR, "q.IRsGHoPm")
                           text = text_elem.text 
                        except:
                           text = ""

                    reviews_data.append({
                        "id": r_id,
                        "author": author,
                        "rating": rating,
                        "date_rel": date_text,
                        "text": text,
                        "source": "TripAdvisor",
                        "scraped_at": datetime.now().isoformat()
                    })
                except Exception as e:
                    continue
            
            print(f"Total collected: {len(reviews_data)}")
            if len(reviews_data) >= max_reviews:
                break
                
            # 3. Pagination (Click Next)
            try:
                # Look for 'Next' button
                # Class: nav next ui_button primary
                next_btn = driver.find_element(By.CSS_SELECTOR, "a.nav.next.ui_button.primary")
                
                # Check if disabled
                if "disabled" in next_btn.get_attribute("class"):
                    print("End of pages.")
                    break
                    
                driver.execute_script("arguments[0].click();", next_btn)
                print("Navigating to next page...")
                time.sleep(4) # Wait for page load

            except Exception as e:
                print("Next button not found or error. Stopping.")
                break
                
    except Exception as e:
        print(f"Major Error: {e}")
    finally:
        driver.quit()
        
    return pd.DataFrame(reviews_data)

In [5]:
# Run
df = scrape_tripadvisor(URL, MAX_REVIEWS)

print(f"Total Records: {len(df)}")
df.head()

Waiting for page load...
Found 0 cards on this page.
Total collected: 0
Next button not found or error. Stopping.
Total Records: 0


In [6]:
if not df.empty:
    # Save to Excel
    df.to_excel("tripadvisor_reviews.xlsx", index=False)
    print("Saved to tripadvisor_reviews.xlsx")
    
    # Save to SQLite
    with sqlite3.connect(DB_NAME) as conn:
        df.to_sql("reviews", conn, if_exists="replace", index=False)
    print(f"Saved to {DB_NAME}")