In [10]:
import time
import os
import pandas as pd
from datetime import date, timedelta

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager


def scrape_airbnb_prices(checkin, checkout, scrolls=6):
    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_argument("--start-maximized")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    wait = WebDriverWait(driver, 20)

    url = f"https://www.airbnb.es/s/Madrid--España/homes?checkin={checkin}&checkout={checkout}&adults=2"
    driver.get(url)

    # Aceptar cookies si aparece
    try:
        btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Aceptar')]")))
        btn.click()
        time.sleep(2)
    except:
        pass

    # Esperar a que aparezcan resultados
    try:
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-testid='card-container']")))
    except:
        driver.save_screenshot(f"airbnb_debug_{checkin}.png")
        print(f"No se detectaron resultados para {checkin}-{checkout}. Captura guardada.")
        driver.quit()
        return []

    # Scroll para cargar más resultados
    for _ in range(scrolls):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    cards = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='card-container']")
    print(f"Total de tarjetas encontradas para {checkin}-{checkout}:", len(cards))

    data = []

    for card in cards:
        try:
            title = card.find_element(By.CSS_SELECTOR, "div[role='group']").text.split("\n")[0]
        except:
            title = "N/A"

        price_text = "N/A"
        try:
            spans = card.find_elements(By.TAG_NAME, "span")
            for s in spans:
                text = s.text
                if "€" in text:
                    price_text = text
                    break
        except:
            price_text = "N/A"

        try:
            link = card.find_element(By.TAG_NAME, "a").get_attribute("href")
        except:
            link = "N/A"

        data.append({
            "checkin": checkin,
            "checkout": checkout,
            "title": title,
            "price": price_text,
            "link": link
        })

    driver.quit()
    return data


# ==============================================
# GENERAR FECHAS DEL AÑO 2024 (viernes → domingo)
# ==============================================
start_date = date(2024, 1, 5)  # primer viernes de 2024
end_date = date(2024, 12, 31)
delta = timedelta(days=7)  # para cada fin de semana

all_data = []

current = start_date
while current <= end_date:
    checkin = current.strftime("%Y-%m-%d")
    checkout = (current + timedelta(days=2)).strftime("%Y-%m-%d")  # viernes→domingo

    print(f"\n Extrayendo datos para {checkin} → {checkout}")
    week_data = scrape_airbnb_prices(checkin, checkout, scrolls=6)
    all_data.extend(week_data)

    current += delta

# Guardar todo en CSV
if all_data:
    df = pd.DataFrame(all_data)
    filename = f"airbnb_madrid_prices_2024.csv"
    filepath = os.path.join(os.getcwd(), filename)
    df.to_csv(filepath, index=False, encoding="utf-8")
    print("\n CSV final guardado con todo 2024 en:", filepath)
else:
    print(" No se extrajeron datos para ningún fin de semana de 2024.")



Extrayendo datos para 2024-01-05 → 2024-01-07
Total de tarjetas encontradas para 2024-01-05-2024-01-07: 18

Extrayendo datos para 2024-01-12 → 2024-01-14
Total de tarjetas encontradas para 2024-01-12-2024-01-14: 18

Extrayendo datos para 2024-01-19 → 2024-01-21
Total de tarjetas encontradas para 2024-01-19-2024-01-21: 18

Extrayendo datos para 2024-01-26 → 2024-01-28
Total de tarjetas encontradas para 2024-01-26-2024-01-28: 18

Extrayendo datos para 2024-02-02 → 2024-02-04
Total de tarjetas encontradas para 2024-02-02-2024-02-04: 18

Extrayendo datos para 2024-02-09 → 2024-02-11
Total de tarjetas encontradas para 2024-02-09-2024-02-11: 18

⏳ Extrayendo datos para 2024-02-16 → 2024-02-18
Total de tarjetas encontradas para 2024-02-16-2024-02-18: 18

Extrayendo datos para 2024-02-23 → 2024-02-25
Total de tarjetas encontradas para 2024-02-23-2024-02-25: 18

Extrayendo datos para 2024-03-01 → 2024-03-03
Total de tarjetas encontradas para 2024-03-01-2024-03-03: 18

Extrayendo datos para 202