In [3]:
import pandas as pd
import re
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

In [4]:
def clean_text(text):
    return re.sub(r'\s+', ' ', text.strip().replace('\u202f', ''))

In [5]:
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

In [6]:
data = []

for page in range(1, 51):
    url = f"https://dakar-auto.com/senegal/motos-and-scooters-3?page={page}"
    print(f"🔎 Scraping page {page}...")

    try:
        driver.get(url)
        time.sleep(2)

        annonces = driver.find_elements(By.CSS_SELECTOR, ".listings-cards__list-item")

        for annonce in annonces:
            try:
                # ✅ V1 : Marque et V2 : Année (depuis le titre)
                titre = clean_text(annonce.find_element(By.CSS_SELECTOR, "h2 a").text)
                parts = titre.split()
                marque = parts[0] if len(parts) > 0 else ""
                annee = parts[-1] if parts[-1].isdigit() and len(parts[-1]) == 4 else ""

                # ✅ V3 : Prix
                prix = clean_text(annonce.find_element(By.CSS_SELECTOR, "h3").text)
                prix = prix.replace("F CFA", "").replace(" ", "")

                # ✅ V4 : Adresse (ville + région)
                ville = ""
                region = ""
                try:
                    ville = clean_text(annonce.find_element(By.CLASS_NAME, "town-suburb").text)
                except:
                    pass
                try:
                    region = clean_text(annonce.find_element(By.CLASS_NAME, "province").text)
                except:
                    pass
                adresse = f"{ville}{region}".strip(",")

                # ✅ V5 : Kilométrage
                km = ""
                attributs = annonce.find_elements(By.CSS_SELECTOR, "ul.listing-card__attribute-list li")
                for li in attributs:
                    text = clean_text(li.text)
                    if "km" in text.lower():
                        km = text.replace("km", "").strip()
                        break

                # ✅ V6 : Propriétaire
                try:
                    proprio = clean_text(annonce.find_element(By.CLASS_NAME, "time-author").text.replace("Par ", ""))
                except:
                    proprio = ""

                data.append({
                    "marque": marque,
                    "annee": annee,
                    "prix (FCFA)": prix,
                    "adresse": adresse,
                    "kilometrage (Km)": km,
                    "proprietaire": proprio
                })

            except Exception as e:
                print(f"⚠️ Erreur dans une annonce : {e}")
                continue

    except Exception as e:
        print(f"❌ Erreur page {page} : {e}")
        continue

driver.quit()

🔎 Scraping page 1...
🔎 Scraping page 2...
🔎 Scraping page 3...
🔎 Scraping page 4...
🔎 Scraping page 5...
🔎 Scraping page 6...
🔎 Scraping page 7...
🔎 Scraping page 8...
🔎 Scraping page 9...
🔎 Scraping page 10...
🔎 Scraping page 11...
🔎 Scraping page 12...
🔎 Scraping page 13...
🔎 Scraping page 14...
🔎 Scraping page 15...
🔎 Scraping page 16...
🔎 Scraping page 17...
🔎 Scraping page 18...
🔎 Scraping page 19...
🔎 Scraping page 20...
🔎 Scraping page 21...
🔎 Scraping page 22...
🔎 Scraping page 23...
🔎 Scraping page 24...
🔎 Scraping page 25...
🔎 Scraping page 26...
🔎 Scraping page 27...
🔎 Scraping page 28...
🔎 Scraping page 29...
🔎 Scraping page 30...
🔎 Scraping page 31...
🔎 Scraping page 32...
🔎 Scraping page 33...
🔎 Scraping page 34...
🔎 Scraping page 35...
🔎 Scraping page 36...
🔎 Scraping page 37...
🔎 Scraping page 38...
🔎 Scraping page 39...
🔎 Scraping page 40...
🔎 Scraping page 41...
🔎 Scraping page 42...
🔎 Scraping page 43...
🔎 Scraping page 44...
🔎 Scraping page 45...
🔎 Scraping page 46.

In [8]:
df = pd.DataFrame(data)
pd.set_option("display.max_colwidth", None)
df.head(1000)

Unnamed: 0,marque,annee,prix (FCFA),adresse,kilometrage (Km),proprietaire
0,Yamaha,2020,3900000,"Ouakam,Dakar",14000,Cheikh Mbacké
1,Yamaha,2020,1200000,"Yoff,Dakar",10000,Mohamed SALL
2,Honda,2014,1150000,"Grand-Dakar,Dakar",55000,Mohamed Memed
3,Piaggio,2012,490000,"Parcelles Assainies,Dakar",1,Djibril Ba
4,Aprilia,2018,300000,"Grand-Dakar,Dakar",150,Ibrahim Sarr
...,...,...,...,...,...,...
995,Piaggio,2015,300000,"Grand-Dakar,Dakar",,Nabs DAKAR
996,Gilera,,600000,"Grand-Dakar,Dakar",,Nabs DAKAR
997,Piaggio,,450000,"Grand-Dakar,Dakar",,Nabs DAKAR
998,Piaggio,2016,400000,"Grand-Dakar,Dakar",,Nabs DAKAR


In [10]:
df.to_csv("motos_clean.csv", index=False, encoding="utf-8-sig")
