#  WEBSCRAPPING autoscout24

- Importe de librerías necesarias

In [2]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.by import By

from tqdm import tqdm

import re
import time

import pandas as pd

- Creamos la configuración del navegador

In [3]:
c_options = webdriver.ChromeOptions()
c_options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=c_options)

- Establecemos las configuraciones de la URL para buscar con los diferentes filtros

In [4]:
base_url = "https://www.autoscout24.es/lst/tesla/"
tesla_models = ["model-3", "model-s", "model-x", "model-y"]

fixed_params = (
    "atype=C&damaged_listing=exclude&desc=0&powertype=kw"
    "&search_id=a1ps9r8ye6&sort=standard&source=homepage_search-mask&ustate=N%2CU"
)

bcol_values = ['bc_azul', 'bc_rojo', 'bc_blanco', 'bc_negro', 'bc_gris']
cy_values = ["E", "F", "I"]

country_mapping = {
    "E": "España",
    "F": "Francia",
    "I": "Italia"
}

results = []

accept_cookies = False

patron_tesla = re.compile(r'\bTesla\b', re.IGNORECASE)
patron_modelo = re.compile(r'\bModel (3|S|X|Y)\b', re.IGNORECASE)


- Realizamos el Scrapeo de las diferentes páginas

In [5]:
try:
    for model in tqdm(tesla_models):
        for bcol in bcol_values:
            for cy in cy_values:
                query_params = f"cy={cy}"
                full_url = f"{base_url}{model}/{bcol}?{fixed_params}&{query_params}"
                driver.get(full_url)

                if not accept_cookies:
                    try:
                        wait = WebDriverWait(driver, 10)
                        cookie_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "_consent-accept_1lphq_114")))
                        cookie_button.click()
                        accept_cookies = True
                    except TimeoutException:
                        print("No se encontraron cookies para aceptar.")

                while True:
                    time.sleep(3)
                    car_cards = driver.find_elements(By.TAG_NAME, 'article')
                    if len(car_cards) > 0:
                        for car in car_cards:
                            try:
                                marca_texto = car.find_element(By.TAG_NAME, 'h2').text
                                modelo = patron_modelo.search(marca_texto)
                            except NoSuchElementException:
                                marca_texto = "No disponible"
                                modelo = None

                            try:
                                descripcion = car.find_element(By.CLASS_NAME, 'ListItem_subtitle__VEw08').text
                            except NoSuchElementException:
                                descripcion = "No disponible"

                            try:
                                precio = car.find_element(By.CSS_SELECTOR, '[data-testid="regular-price"]').text
                                precio_limpio = re.sub(r'[^\d.]', '', precio)
                                precio = float(precio_limpio.replace('.', ''))
                            except NoSuchElementException:
                                precio = "No disponible"

                            try:
                                kilometraje = car.find_element(By.CSS_SELECTOR, '[data-testid="VehicleDetails-mileage_road"]').text
                                kilometraje_limpio = re.sub(r'[^\d.]', '', kilometraje)
                                kilometraje = int(kilometraje_limpio.replace('.', ''))
                            except NoSuchElementException:
                                kilometraje = "No disponible"

                            try:
                                fecha = car.find_element(By.CSS_SELECTOR, '[data-testid="VehicleDetails-calendar"]').text
                            except NoSuchElementException:
                                fecha = "No disponible"

                            try:
                                velocidad = car.find_element(By.CSS_SELECTOR, '[data-testid="VehicleDetails-speedometer"]').text
                            except NoSuchElementException:
                                velocidad = "No disponible"

                            pais_nombre = country_mapping.get(cy, "Desconocido")
                            modelo_final = modelo.group(0) if modelo else "No disponible"

                            results.append({
                                "Modelo": modelo_final,
                                "Precio": precio,
                                "Color": bcol.split('_')[1].capitalize(),
                                "Pais": pais_nombre,
                                "Kilometraje": kilometraje,
                                "date": fecha
                            })

                        try:
                            next_button = driver.find_elements(By.CLASS_NAME, "prev-next")[1]
                            if "pagination-item--disabled" in next_button.get_attribute("class"):
                                break
                            else:
                                next_button.click()
                        except NoSuchElementException:
                            break
                    else:
                        break


finally:
    driver.quit()


100%|██████████| 4/4 [08:42<00:00, 130.61s/it]


In [10]:
df_tesla = pd.DataFrame(results)
df_tesla['date'] = df_tesla['date'].str.split('/').str[1]

In [13]:
df_tesla.to_csv("tesla_autoscout24.csv", index=False, encoding="utf-8-sig")