In [27]:
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd
import logging

# Configuration du logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler('scraping.log'),
        logging.StreamHandler()
    ]
)

class CdiscountScraper:
    def __init__(self):
        self.driver = self._init_driver()
        self.base_url = "https://www.cdiscount.com"
        
    def _init_driver(self):
        """Initialise le driver Chrome avec plusieurs fallbacks"""
        options = Options()
        options.add_argument("--window-size=1920,1080")
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        options.add_experimental_option("useAutomationExtension", False)
        
        # Essai 1: WebDriver Manager
        try:
            service = Service(ChromeDriverManager().install())
            return webdriver.Chrome(service=service, options=options)
        except Exception as e:
            logging.warning(f"WebDriverManager failed: {str(e)}")
        
        # Essai 2: ChromeDriver dans le PATH
        try:
            return webdriver.Chrome(options=options)
        except Exception as e:
            logging.warning(f"System ChromeDriver failed: {str(e)}")
        
        # Essai 3: ChromeDriver dans le répertoire courant
        try:
            service = Service(executable_path='./chromedriver')
            return webdriver.Chrome(service=service, options=options)
        except Exception as e:
            logging.error(f"Local ChromeDriver failed: {str(e)}")
            raise RuntimeError("Impossible d'initialiser ChromeDriver. Solutions:\n"
                             "1. Vérifiez votre connexion Internet\n"
                             "2. Installez ChromeDriver manuellement\n"
                             "3. Mettez à jour Chrome")

    def scrape_category(self, category, pages=2):
        """Scrape une catégorie de produits"""
        products = []
        search_term = category.replace(" ", "+")
        
        for page in range(1, pages + 1):
            url = f"{self.base_url}/search/10/{search_term}.html?page={page}"
            logging.info(f"Scraping page {page}: {url}")
            
            try:
                self.driver.get(url)
                time.sleep(2)  # Attente basique
                
                # Accepter les cookies
                self._accept_cookies()
                
                # Trouver les produits
                items = self.driver.find_elements(By.CSS_SELECTOR, ".lpBloc .prdtBloc")
                logging.info(f"Found {len(items)} products")
                
                for item in items:
                    try:
                        product = {
                            'category': category,
                            'name': self._get_text(item, ".prdtBTit"),
                            'price': self._get_price(item),
                            'old_price': self._get_old_price(item),
                            'rating': self._get_rating(item),
                            'reviews': self._get_reviews(item),
                            'url': self._get_attribute(item, ".prdtBTo a", "href")
                        }
                        products.append(product)
                    except Exception as e:
                        logging.error(f"Error processing product: {str(e)}")
                        continue
                
            except Exception as e:
                logging.error(f"Error scraping page {page}: {str(e)}")
                continue
        
        return products
    
    def _accept_cookies(self):
        """Tente d'accepter les cookies"""
        try:
            cookie_btn = self.driver.find_element(By.ID, "footer_tc_privacy_button_2")
            cookie_btn.click()
            logging.info("Cookies acceptés")
            time.sleep(1)
        except Exception as e:
            logging.warning(f"Could not accept cookies: {str(e)}")

    # Méthodes helpers pour l'extraction des données
    def _get_text(self, element, selector):
        try:
            return element.find_element(By.CSS_SELECTOR, selector).text.strip()
        except:
            return None
    
    def _get_price(self, element):
        try:
            price_text = self._get_text(element, ".price")
            return float(price_text.replace("€", "").replace(",", ".").strip())
        except:
            return None
    
    def _get_old_price(self, element):
        try:
            price_text = self._get_text(element, ".prdtPrSt")
            return float(price_text.replace("€", "").replace(",", ".").strip())
        except:
            return None
    
    def _get_rating(self, element):
        try:
            rating_text = element.find_element(By.CSS_SELECTOR, ".c-stars-reviews").get_attribute("title")
            return float(rating_text.split("/")[0].replace(",", "."))
        except:
            return None
    
    def _get_reviews(self, element):
        try:
            reviews_text = self._get_text(element, ".itemReviewCount")
            return int(reviews_text.replace("(", "").replace(")", ""))
        except:
            return 0
    
    def _get_attribute(self, element, selector, attribute):
        try:
            return element.find_element(By.CSS_SELECTOR, selector).get_attribute(attribute)
        except:
            return None
    
    def close(self):
        """Ferme le navigateur"""
        if self.driver:
            self.driver.quit()
            logging.info("Navigateur fermé")

def main():
    try:
        scraper = CdiscountScraper()
        
        categories = [
            "machine à laver",
            "réfrigérateur",
            "lave-vaisselle"
        ]
        
        all_products = []
        for category in categories:
            logging.info(f"\n{'='*50}\nScraping category: {category}\n{'='*50}")
            products = scraper.scrape_category(category, pages=1)
            all_products.extend(products)
            time.sleep(2)  # Pause entre les catégories
        
        if all_products:
            df = pd.DataFrame(all_products)
            filename = f"cdiscount_products_{pd.Timestamp.now().strftime('%Y%m%d')}.csv"
            df.to_csv(filename, index=False, encoding='utf-8-sig')
            logging.info(f"Saved {len(df)} products to {filename}")
        else:
            logging.warning("No products scraped")
            
    except Exception as e:
        logging.error(f"Main error: {str(e)}")
    finally:
        scraper.close()

if __name__ == "__main__":
    main()

2025-05-19 11:41:38,791 - INFO - Get LATEST chromedriver version for google-chrome
2025-05-19 11:41:39,075 - INFO - Get LATEST chromedriver version for google-chrome
2025-05-19 11:41:39,248 - INFO - Get LATEST chromedriver version for google-chrome
2025-05-19 11:41:39,648 - INFO - WebDriver version 136.0.7103.94 selected
2025-05-19 11:41:39,655 - INFO - Modern chrome version https://storage.googleapis.com/chrome-for-testing-public/136.0.7103.94/win32/chromedriver-win32.zip
2025-05-19 11:41:39,657 - INFO - About to download new driver from https://storage.googleapis.com/chrome-for-testing-public/136.0.7103.94/win32/chromedriver-win32.zip
2025-05-19 11:41:39,892 - INFO - Driver downloading response is 200
2025-05-19 11:41:42,830 - INFO - Get LATEST chromedriver version for google-chrome
2025-05-19 11:41:44,411 - INFO - Driver has been saved in cache [C:\Users\proprietaire\.wdm\drivers\chromedriver\win64\136.0.7103.94]
2025-05-19 11:41:45,767 - INFO - 
Scraping category: machine à laver
2

In [11]:
driver = setup_driver()
driver.get("https://www.rueducommerce.fr/r/70518/lave-linge/")
time.sleep(3)

items = driver.find_elements(By.CSS_SELECTOR, "div.product-item")
print(f"Nombre de produits trouvés : {len(items)}")
for item in items[:3]:  # on teste les 3 premiers
    try:
        nom = item.find_element(By.CSS_SELECTOR, "a.product-item-name").text
        print("Nom produit :", nom)
    except:
        print("Pas pu extraire le nom")


Nombre de produits trouvés : 0
