In [None]:
!pip install selenium

Collecting selenium
  Downloading selenium-4.31.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.29.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.31.0-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.29.0-py3-none-any.whl (492 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m492.9/492.9 kB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloading outcome-1.3.0.post0-py2.py3-

In [None]:
import tempfile
import shutil
import csv
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

def create_driver():
    profile_dir = tempfile.mkdtemp(prefix="chrome-profile-")
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--window-size=1920,1080')
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36"
    )
    chrome_options.add_argument(f"--user-data-dir={profile_dir}")
    driver = webdriver.Chrome(options=chrome_options)
    return driver, profile_dir

def main():
    driver, profile_dir = create_driver()
    results = []

    # Load page 1 manually
    start_url = "https://www.uberpeople.net/search/654053/?q=destination+filter&c[showFilter]=visibleOnly&o=relevance"
    driver.get(start_url)

    for page in range(1, 26):
        print(f"Scraping page {page}...")

        try:
            WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "h3.contentRow-title"))
            )
        except TimeoutException:
            print(f"Timeout waiting for forum entries on page {page}.")
            break

        forum_elements = driver.find_elements(By.CSS_SELECTOR, "h3.contentRow-title")
        for elem in forum_elements:
            try:
                link = elem.find_element(By.TAG_NAME, "a")
                title = link.text.strip()
                href = link.get_attribute("href")
                results.append({"url": href, "title": title})
            except Exception as e:
                print(f"Error reading post: {e}")

        # Try to click the "Next" button
        try:
            next_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "a[aria-label='Next']"))
            )
            driver.execute_script("arguments[0].click();", next_button)
            time.sleep(2)  # Allow content to load
        except (TimeoutException, NoSuchElementException):
            print("No more pages found or couldn't click 'Next'. Ending scrape.")
            break

    driver.quit()
    shutil.rmtree(profile_dir)

    # Save results
    with open("forums.csv", mode="w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["url", "title"])
        writer.writeheader()
        for row in results:
            writer.writerow(row)

    print(f"Saved {len(results)} unique posts to forums.csv")

if __name__ == "__main__":
    main()


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
No more pages found or couldn't click 'Next'. Ending scrape.
Saved 500 unique posts to forums.csv


In [None]:
import pandas as pd
pd.read_csv('forums.csv')

Unnamed: 0,url,title
0,https://www.uberpeople.net/threads/destination...,Destination Filter are no longer a thing FUber...
1,https://www.uberpeople.net/threads/destination...,Destination Filter
2,https://www.uberpeople.net/threads/destination...,Destination filter experience
3,https://www.uberpeople.net/threads/destination...,Destination filter frustrating
4,https://www.uberpeople.net/threads/destination...,Destination Filter not working
...,...,...
495,https://www.uberpeople.net/threads/destination...,Destination filter
496,https://www.uberpeople.net/threads/messing-wit...,Messing with the destination filter
497,https://www.uberpeople.net/threads/destination...,Destination filter intel
498,https://www.uberpeople.net/threads/has-lyft-in...,Has Lyft Increased Destination Filters?
