In [None]:
# -*- coding: utf-8 -*-
"""
FINAL SCRIPT: This script scrapes vehicle data from a Riyasewana URL.
It uses Selenium to handle security and has been corrected to match the
exact HTML structure of the vehicle page.
"""

# Step 1: Install and set up Selenium and Chrome WebDriver
!pip install selenium
!apt-get update
!apt-get install -y chromium-chromedriver

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time

# Step 2: Set up Chrome options for running in Colab
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')

# Initialize the WebDriver
try:
    driver = webdriver.Chrome(options=chrome_options)

    # Step 3: Navigate to the page
    url = 'https://riyasewana.com/buy/toyota-prius-sale-rajagiriya-10549562'
    print(f"Navigating to {url}...")
    driver.get(url)

    print("Waiting for page to load...")
    time.sleep(5) # 5 seconds is enough now that we know it loads

    # Step 4: Get the final page HTML and parse it
    page_html = driver.page_source
    soup = BeautifulSoup(page_html, 'html.parser')

    # Step 5: Extract data using the CORRECTED selectors
    print("\n--- Scraping Results ---")

    vehicle_data = {}

    # --- Vehicle Title ---
    # CORRECTED: The title is in the first <h1> tag without a specific class.
    title_element = soup.find('h1')
    if title_element:
        vehicle_data['Title'] = title_element.get_text(strip=True)
        print(f"Title: {vehicle_data['Title']}")
    else:
        print("Could not find vehicle title.")

    # --- Contact and Price ---
    # CORRECTED: Both are in <span> tags with class 'moreph'.
    # The first one is the contact number, the second is the price.
    all_spans = soup.find_all('span', class_='moreph')
    if len(all_spans) >= 2:
        vehicle_data['Contact'] = all_spans[0].get_text(strip=True)
        vehicle_data['Price'] = all_spans[1].get_text(strip=True)
        print(f"Price: {vehicle_data['Price']}")
        print(f"Contact: {vehicle_data['Contact']}")
    else:
        print("Could not find price or contact information.")

    print("\n--- Vehicle Details ---")

    # --- All Other Details from the Table ---
    # CORRECTED: All details are in a single <table> with class 'moret'.
    details_table = soup.find('table', class_='moret')
    if details_table:
        rows = details_table.find_all('tr')
        for row in rows:
            cells = row.find_all('td')
            # Rows with 4 cells have two key-value pairs (e.g., Make/Model)
            if len(cells) == 4:
                key1 = cells[0].get_text(strip=True)
                value1 = cells[1].get_text(strip=True)
                key2 = cells[2].get_text(strip=True)
                value2 = cells[3].get_text(strip=True)
                if key1 and value1:
                    print(f"{key1}: {value1}")
                    vehicle_data[key1] = value1
                if key2 and value2:
                    print(f"{key2}: {value2}")
                    vehicle_data[key2] = value2
            # Rows with 2 useful cells have one key-value pair (e.g., Options)
            elif len(cells) == 2:
                key = cells[0].get_text(strip=True)
                value = cells[1].get_text(strip=True)
                if key and value:
                    print(f"{key}: {value}")
                    vehicle_data[key] = value

    print("\n--- Scraped Data Summary (Dictionary) ---")
    print(vehicle_data)


except Exception as e:
    print(f"\nAn unexpected error occurred: {e}")

finally:
    # Always close the browser session
    if 'driver' in locals():
        driver.quit()
        print("\nBrowser session closed.")