In [109]:
import requests
import time
import re
import csv
import os
from bs4 import BeautifulSoup


In [110]:


header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}

BASE_URLS = {
    "Italienne": "https://www.marmiton.org/dossier-viva-italia",
    "Japonaise": "https://www.marmiton.org/recettes/recherche.aspx?aqt=japon",
    "Libanaise": "https://www.marmiton.org/recettes/recherche.aspx?aqt=liban",
    "Indienne": "https://www.marmiton.org/recettes/recherche.aspx?aqt=inde",
    "Fran√ßaise": "https://www.marmiton.org/recettes/recherche.aspx?aqt=france",
    "Rapide et Facile": "https://www.marmiton.org/recettes/recherche.aspx?aqt=rapide-facile",
    "Anti-gaspillage": "https://www.marmiton.org/recettes/recherche.aspx?aqt=anti-gaspi",
}


In [111]:

OUTPUT_CSV = "recettes.csv"
DELAY_BETWEEN_REQUESTS = 1.5


In [114]:
def get_recipe_links(url):
    """R√©cup√®re les liens de recettes sur la page de recherche."""
    links = []
    try:
        response = requests.get(url, headers=header, timeout=15)
        if response.status_code != 200:
            print(f"  ‚ö†Ô∏è Erreur de connexion (Code {response.status_code})")
            return []
            
        soup = BeautifulSoup(response.text, "html.parser")
        
        for a in soup.find_all("a", href=True):
            href = a['href']
            if "/recettes/recette_" in href:
                full_url = href if href.startswith("http") else "https://www.marmiton.org" + href
                if full_url not in links:
                    links.append(full_url)
                    
    except Exception as e:
        print(f"  ‚ùå Erreur lors de la r√©cup√©ration : {e}")
        
    return links[:30] 
    

def parse_recipe(url, culture_theme):
    """Parse une page de recette et extrait les informations."""
    try:
        response = requests.get(url, headers=header, timeout=10)
        soup = BeautifulSoup(response.text, "html.parser")
        page_text = soup.get_text().lower()
        
        # Nom de la recette
        nom = soup.select_one("h1").get_text().strip() if soup.select_one("h1") else "N/A"
        
        # Note de la recette
        note = soup.select_one(".recipe-header__rating-text")
        note = note.get_text().strip() if note else "N/A"
        
        # --- GO√õTS ---
        gouts = []
        if "v√©g√©tarien" in page_text or "vegetarien" in page_text: 
            gouts.append("V√©g√©tarien")
        if "sans gluten" in page_text: 
            gouts.append("Sans Gluten")
        if "v√©gan" in page_text or "vegan" in page_text: 
            gouts.append("V√©gan")
        if "healthy" in page_text or "l√©ger" in page_text: 
            gouts.append("Healthy")
        
        # --- INGREDIENTS ---
        ing_elements = soup.select(".recipe-ingredients__list__item, .card-ingredient-title")
        ingredients = [re.sub(r"\s+", " ", i.get_text()).strip() for i in ing_elements]
        
        return {
            "recette": nom,
            "th√©matique_culture": culture_theme,
            "go√ªt": ", ".join(gouts) if gouts else "Standard",
            "ingredient": " | ".join(ingredients),
            "note": note
        }
    except Exception as e:
        print(f"    ‚ùå Erreur : {e}")
        return None

def main():
    dataset = []
    
    for theme, url in BASE_URLS.items():
        print(f"\n{'='*60}")
        print(f"üç¥ Th√®me : {theme}")
        print(f"{'='*60}")
        links = get_recipe_links(url)
        print(f"üìã {len(links)} recettes trouv√©es\n")
        
        for i, link in enumerate(links, 1):
            data = parse_recipe(link, theme)
            if data:
                dataset.append(data)
                print(f"  [{i:2d}/{len(links)}] ‚úì {data['recette'][:35]:35} | Note: {data['note']}")
            time.sleep(DELAY_BETWEEN_REQUESTS)
    
    print(f"\n{'='*60}")
    print("üìä STATISTIQUES")
    print(f"{'='*60}")
    for cat, count in stats.items():
        print(f" {count:3d} recettes")
    print(f"  {'TOTAL':10} : {len(dataset):3d} recettes")
    
    if dataset:
        keys = dataset[0].keys()
        with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=keys)
            writer.writeheader()
            writer.writerows(dataset)
        print(f"\n‚úÖ Fichier '{OUTPUT_CSV}' cr√©√© avec succ√®s !\n")
    else:
        print("\n‚ö†Ô∏è Aucune recette r√©cup√©r√©e.\n")

if __name__ == "__main__":
    main()


üç¥ Th√®me : Italienne
üìã 30 recettes trouv√©es

  [ 1/30] ‚úì Bruschetta (Italie)                 | Note: 4.3/5
  [ 2/30] ‚úì Focaccia                            | Note: 4.4/5
  [ 3/30] ‚úì Sauce pesto                         | Note: 3.8/5
  [ 4/30] ‚úì Gressins de Lili                    | Note: 4.9/5
  [ 5/30] ‚úì Crostinis                           | Note: 4.6/5
  [ 6/30] ‚úì Arancini di riso                    | Note: 3.7/5
  [ 7/30] ‚úì Taralli (biscuits sal√©s italiens)   | Note: 4.7/5
  [ 8/30] ‚úì Gnocchis r√¥tis au parmesan et sauce | Note: 0/5
  [ 9/30] ‚úì Risotto aux champignons et tarte au | Note: 0/5
  [10/30] ‚úì Mini lasagnes individuelles au Airf | Note: 0/5
  [11/30] ‚úì Rag√π arancini au Airfryer           | Note: 0/5
  [12/30] ‚úì Lasagnes √† la bolognaise            | Note: 4.8/5
  [13/30] ‚úì Carbonara traditionnelle            | Note: 4.6/5
  [14/30] ‚úì P√¢tes sauce bolognaise - Rag√π bolog | Note: 5/5
  [15/30] ‚úì Linguini alle vongole               | Not

NameError: name 'stats' is not defined