In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

BASE_URL = "https://www.mubawab.tn"
LISTING_URL = f"{BASE_URL}/fr/cc/immobilier-a-vendre:p:1"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

def get_ad_links(page_url):
    driver.get(page_url)
    time.sleep(2)
    ad_links = []
    ads = driver.find_elements(By.CSS_SELECTOR, "h2.listingTit a")
    for ad in ads:
        link = ad.get_attribute("href")
        if link:
            ad_links.append(link)
    print(f"Found {len(ad_links)} links.")
    return ad_links

def scrape_ad_details(ad_url):
    driver.get(ad_url)
    time.sleep(2)
    try:
        title = driver.find_element(By.CSS_SELECTOR, "h1.pageTitle").text.strip()
    except:
        title = None
    try:
        price = driver.find_element(By.CSS_SELECTOR, 'h3.orangeTit').text.strip()
    except:
        price = None
    try:
        location = driver.find_element(By.CSS_SELECTOR, 'h3.greyTit').text.strip()
    except:
        location = None
    try:
        description = driver.find_element(By.CSS_SELECTOR, "div.description").text.strip()
    except:
        description = None
    try:
        area = driver.find_element(By.CSS_SELECTOR, '.disFlex.adDetails > .adDetailFeature:nth-child(1) > span').text.strip()
    except:
        area = None
    try:
        rooms = driver.find_element(By.CSS_SELECTOR, '.disFlex.adDetails > .adDetailFeature:nth-child(2) > span').text.strip()
    except:
        rooms = None
    try:
        bedrooms = driver.find_element(By.CSS_SELECTOR, '.disFlex.adDetails > .adDetailFeature:nth-child(3) > span').text.strip()
    except:
        bedrooms = None
    try:
        bathrooms = driver.find_element(By.CSS_SELECTOR, '.disFlex.adDetails > .adDetailFeature:nth-child(4) > span').text.strip()
    except:
        bathrooms = None
    try:
        property_type = driver.find_element(By.CSS_SELECTOR, '.caractBlockProp .adMainFeature:nth-child(1) .adMainFeatureContentValue').text.strip()
    except:
        property_type = None
    try:
        condition = driver.find_element(By.CSS_SELECTOR, '.caractBlockProp .adMainFeature:nth-child(2) .adMainFeatureContentValue').text.strip()
    except:
        condition = None
    features = []
    try:
        features = [feature.text.strip() for feature in driver.find_elements(By.CSS_SELECTOR, '.caractBlockProp .adFeature > span')]
    except:
        pass
    return {
        "Title": title,
        "Price": price,
        "Location": location,
        "Description": description,
        "Area": area,
        "Rooms": rooms,
        "Bedrooms": bedrooms,
        "Bathrooms": bathrooms,
        "Property Type": property_type,
        "Condition": condition,
        "Features": ", ".join(features),
        "URL": ad_url,
    }

def scrape_all_ads(start_url, csv_filename, max_pages=10):
    with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=[
            "Title", "Price", "Location", "Description", "Area", "Rooms", "Bedrooms", "Bathrooms",
            "Property Type", "Condition", "Features", "URL"
        ])
        writer.writeheader()
        all_ads = []
        current_page = start_url
        page_count = 1
        while current_page and page_count <= max_pages:
            print(f"Scraping page {page_count}: {current_page}")
            ad_links = get_ad_links(current_page)
            for ad_url in ad_links:
                print(f"Scraping ad: {ad_url}")
                ad_details = scrape_ad_details(ad_url)
                all_ads.append(ad_details)
                writer.writerow(ad_details)
                time.sleep(1)
            page_count += 1
            current_page = f"{BASE_URL}/fr/cc/immobilier-a-louer-all:sc:apartment-rent,house-rent,villa-rent:p:{page_count}"
            time.sleep(2)
    print(f"Scraped {len(all_ads)} ads and saved to {csv_filename}.")

scrape_all_ads(LISTING_URL, "mubawab_ads_vente.csv")
driver.quit()
