# Universal Google Maps Reviews Scraper (Selenium)

This notebook scrapes reviews from a Google Maps place and saves them to SQLite + Excel.

In [1]:
# Dependencies
import time
import pandas as pd
import sqlite3
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys

## Configuration
Set the Google Maps URL and how many reviews you want.

In [2]:
# TARGET URL for "Kerta Gosa"
# Use a URL that opens the 'Place' view directly.
URL = "https://www.google.com/maps/place/Objek+Wisata+Kerta+Gosa/@-8.5353873,115.4033437,17z/data=!4m15!1m8!3m7!1s0x2dd211154c6b51d9:0xd32fe6608e383c8!2sObjek+Wisata+Kerta+Gosa!8m2!3d-8.5358244!4d115.403541!10e5!16s%2Fm%2F0273h5v!3m5!1s0x2dd211154c6b51d9:0xd32fe6608e383c8!8m2!3d-8.5358244!4d115.403541!16s%2Fm%2F0273h5v?entry=ttu&g_ep=EgoyMDI1MTIwOS4wIKXMDSoASAFQAw%3D%3D"
MAX_REVIEWS = 100  # Adjust as needed (set high for all)
DB_NAME = "kerta_gosa.db"

In [3]:
def init_driver():
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")  # Run in background (optional)
    options.add_argument("--lang=en-US") # Force English for consistent parsing
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

In [4]:
def scrape_reviews(url, max_reviews=50):
    driver = init_driver()
    reviews_data = []
    
    try:
        driver.get(url)
        print("Waiting for page load...")
        time.sleep(5) # Wait for initial load
        
        # --- 1. Switch to Reviews Tab (Robust) ---
        print("Checking for Reviews tab...")
        try:
            # Search for buttons or tabs with text "Reviews" or "Ulasan"
            tab_xpath = "//*[contains(text(), 'Reviews') or contains(text(), 'Ulasan')]"
            potential_tabs = driver.find_elements(By.XPATH, tab_xpath)
            for tab in potential_tabs:
                try:
                    # Check if it's clickable and looks like a tab/button
                    tag = tab.tag_name
                    role = tab.get_attribute("role")
                    # Some tabs are divs with role="tab", some are buttons
                    if tag == "button" or role == "tab":
                        tab.click()
                        print("Clicked Reviews tab!")
                        time.sleep(3)
                        break
                except:
                    continue
        except Exception as e:
            print(f"Tab switch warning: {e}")

        # --- 2. Find Scrollable Container ---
        print("Finding scrollable container...")
        scrollable_div = None
        
        # Priority list of selectors
        possible_selectors = [
             'div[role="feed"]',         # The most reliable container for reviews
             '.m6QErb[aria-label]',      # Often has an aria-label like "Reviews"
             '.m6QErb'                   # Generic fallback
        ]
        
        for sel in possible_selectors:
            try:
                found = driver.find_elements(By.CSS_SELECTOR, sel)
                if found:
                    # If multiple, pick the one that is likely the feed (usually 2nd or has height)
                    scrollable_div = found[-1] # Often the last one is the loaded feed
                    print(f"Found container via: {sel}")
                    break
            except:
                continue
        
        if not scrollable_div:
            print("⚠️ Specific container not found. Trying BODY scroll.")
            scrollable_div = driver.find_element(By.TAG_NAME, "body")

        # --- 3. Scroll and Scrape Loop ---
        prev_height = 0
        scroll_attempts = 0
        
        while len(reviews_data) < max_reviews:
            # Scroll action
            try:
                driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
            except:
                driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
            
            time.sleep(2)
            
            # Find Cards (Robust)
            cards = driver.find_elements(By.CSS_SELECTOR, 'div[data-review-id]')
            if not cards:
                cards = driver.find_elements(By.CLASS_NAME, 'jftiEf')
            if not cards:
                # XPath Fallback: Find stars area parent
                cards = driver.find_elements(By.XPATH, '//span[contains(@aria-label, "stars") or contains(@aria-label, "bintang")]/../../..')
            
            print(f"Loaded {len(cards)} reviews... (Target: {max_reviews})", end="\r")
            
            if len(cards) >= max_reviews:
                break
            
            # Check if stuck
            try:
                new_height = driver.execute_script('return arguments[0].scrollHeight', scrollable_div)
                if new_height == prev_height:
                    scroll_attempts += 1
                    if scroll_attempts > 5: # Stuck for 10+ seconds
                        print("\nScrolling stopped (end of list or stuck).")
                        break
                else:
                    scroll_attempts = 0
                prev_height = new_height
            except:
                break

        print(f"\nParsing {len(cards)} reviews...")
        
        # --- 4. Extract Data ---
        for card in cards[:max_reviews]:
            try:
                # Expand 'More'
                try:
                    more_btns = card.find_elements(By.TAG_NAME, "button")
                    for btn in more_btns:
                        txt = btn.text.lower()
                        if "more" in txt or "lainnya" in txt or "lengkap" in txt:
                            btn.click()
                except:
                    pass

                # Author
                try:
                    author = card.find_element(By.CSS_SELECTOR, "div.d4r55").text
                except:
                    author = "Unknown"

                # Rating
                try:
                    stars_elem = card.find_element(By.CSS_SELECTOR, "span[aria-label*='stars'], span[aria-label*='bintang']")
                    rating_text = stars_elem.get_attribute("aria-label")
                    rating = float(''.join(c for c in rating_text if c.isdigit() or c == '.'))
                except:
                    rating = 0.0
                
                # Date
                try:
                    date_text = card.find_element(By.CSS_SELECTOR, "span.rsqaWe").text
                except:
                    date_text = ""
                
                # Text
                try:
                    text_elem = card.find_element(By.CSS_SELECTOR, "span.wiI7pd")
                    text = text_elem.text
                except:
                    text = ""

                reviews_data.append({
                    "place_name": "Kerta Gosa",
                    "author": author,
                    "rating": rating,
                    "date_rel": date_text,
                    "text": text,
                    "scraped_at": datetime.now().isoformat()
                })
            except:
                continue
                
    except Exception as e:
        print(f"Error: {e}")
    finally:
        driver.quit()
        
    return pd.DataFrame(reviews_data)

In [5]:
# Run Scraper
df = scrape_reviews(URL, MAX_REVIEWS)

# Display
print(f"Got {len(df)} reviews")
df.head()

Waiting for page load...
Checking for Reviews tab...
Finding scrollable container...
Found container via: .m6QErb[aria-label]
Loaded 2 reviews... (Target: 100)
Scrolling stopped (end of list or stuck).

Parsing 2 reviews...
Got 2 reviews


Unnamed: 0,place_name,author,rating,date_rel,text,scraped_at
0,Kerta Gosa,Unknown,0.0,,,2025-12-27T17:41:45.964751
1,Kerta Gosa,Unknown,45.0,,,2025-12-27T17:41:46.066067


In [6]:
# SAVE TO SQLITE & EXCEL
if not df.empty:
    # 1. SQLite
    with sqlite3.connect(DB_NAME) as conn:
        # Append to existing or replace?
        # We use 'processed_reviews' or just 'reviews'
        df.to_sql("scraped_reviews", conn, if_exists="replace", index=False)
    print(f"Saved to SQLite: {DB_NAME}")
    
    # 2. Excel
    excel_file = "scraped_reviews.xlsx"
    df.to_excel(excel_file, index=False)
    print(f"Saved to Excel: {excel_file}")

Saved to SQLite: kerta_gosa.db


ModuleNotFoundError: No module named 'openpyxl'