In [9]:
# ==========================================
# 1. SETUP: Install Google Chrome Stable
# ==========================================
# Add the Google signing key and repository
!wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
!sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'

# Update apt and install Google Chrome
!apt-get -y update
!apt-get install -y google-chrome-stable

# Install Selenium and WebDriver Manager
!pip install selenium webdriver-manager

# ==========================================
# 2. SCRIPT: Scrape Riyasewana
# ==========================================
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time

# Configure Chrome Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')

# Initialize WebDriver using ChromeDriverManager
# (This automatically finds the correct driver for the Chrome we just installed)
try:
    print("Initializing WebDriver...")
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # --- Start Scraping ---
    url = 'https://riyasewana.com/buy/toyota-corolla-141-sale-dehiwala-mount-lavinia-10797417'
    print(f"Navigating to {url}...")
    driver.get(url)

    # Allow time for dynamic content (if any) to load
    time.sleep(3)

    # Parse content
    page_html = driver.page_source
    soup = BeautifulSoup(page_html, 'html.parser')

    print("\n--- Scraping Results ---")
    vehicle_data = {}

    # 1. Title
    title_element = soup.find('h1')
    if title_element:
        vehicle_data['Title'] = title_element.get_text(strip=True)
        print(f"Title: {vehicle_data['Title']}")

    # 2. Contact & Price (Class: moreph)
    all_spans = soup.find_all('span', class_='moreph')
    if len(all_spans) >= 2:
        vehicle_data['Contact'] = all_spans[0].get_text(strip=True)
        vehicle_data['Price'] = all_spans[1].get_text(strip=True)
        print(f"Price: {vehicle_data['Price']}")
        print(f"Contact: {vehicle_data['Contact']}")

    # 3. Details Table (Class: moret)
    details_table = soup.find('table', class_='moret')
    if details_table:
        rows = details_table.find_all('tr')
        for row in rows:
            cells = row.find_all('td')
            # 4-cell rows (Key, Value, Key, Value)
            if len(cells) == 4:
                k1, v1 = cells[0].get_text(strip=True), cells[1].get_text(strip=True)
                k2, v2 = cells[2].get_text(strip=True), cells[3].get_text(strip=True)
                if k1: vehicle_data[k1] = v1
                if k2: vehicle_data[k2] = v2
            # 2-cell rows (Key, Value)
            elif len(cells) == 2:
                k, v = cells[0].get_text(strip=True), cells[1].get_text(strip=True)
                if k: vehicle_data[k] = v

    print("\n--- Final Data ---")
    print(vehicle_data)

except Exception as e:
    print(f"\nCRITICAL ERROR: {e}")

finally:
    if 'driver' in locals():
        driver.quit()
        print("\nBrowser session closed.")

OK
Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:2 https://cli.github.com/packages stable InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 http://dl.google.com/linux/chrome/deb stable InRelease [1,825 B]
Get:13 http://dl.google.com/linux/chrome/deb stable/main amd64 Packages [1,209 B]
Fetched 3,034 B in 2s (1,499 B/s)
Reading package lists... Done
W: Skipping 