## Quête 2

Liste des exos :

* exceptions
* list comprehension
* query in list
* read file
* parse datetime with timezone
* write file
* *args/**kwargs
* decorator
* request API
* build CLI

## Scrapping du site cuisine-etudiant.fr

Imports

In [51]:
import random
import re
import json

import requests
from tqdm import tqdm
from bs4 import BeautifulSoup

Scrapp one list page

In [2]:
def get_links_by_page(page: int) -> list[str]:
    url = f'https://www.cuisine-etudiant.fr/recettes/page-{page}'
    html_doc = requests.get(url).text

    soup = BeautifulSoup(html_doc, 'html.parser')
    
    recipes_list = soup.find_all('ul', class_='recipes-list')[0]
    list_items = recipes_list.find_all('li')
        
    recipes_links = []

    for item in list_items:
        recipes_links.append(item.a['href'])

    return recipes_links

Get max page number

In [3]:
url = 'https://www.cuisine-etudiant.fr/recettes'
html_doc = requests.get(url).text

In [12]:
soup = BeautifulSoup(html_doc, 'html.parser')

last_page_item = soup.find_all('li', class_='recipes-pagination-item')[-1]
last_page_nb = last_page_item.get_text().replace('\n', '')
last_page_nb = int(last_page_nb)

Scrapp all links

In [13]:
for page in tqdm(range(1, last_page_nb + 1)):
    links = get_links_by_page(page)
    with open('recipes_links.txt', 'a') as f:
        for link in links:
            f.write(link + '\n')

100%|██████████| 174/174 [01:15<00:00,  2.30it/s]


Scrapp single recipe page

In [47]:
def scrap_recipe_page(link: str) -> dict:
    html_doc = requests.get(link).text
    
    soup = BeautifulSoup(html_doc, 'html.parser')
    title = soup.find('h1', class_='recipe-sheet-title').text.strip().replace('\n', '')

    recipe_list = soup.find('ul', class_='recipe-sheet-list')
    list_items = recipe_list.find_all('li')

    ingredients = []
    for item in list_items:
        ingredients.append(item.label.get_text().strip().replace('\n', ''))
        
    persons_elt = soup.find('h2', class_='recipe-sheet-subtitle')
    persons_text = persons_elt.get_text().strip().replace('\n', '')
    persons = re.match('Ingrédients \((?P<persons>\d+) pers\.\)', persons_text, flags=re.I).group('persons')
        
    return {
        'title': title,
        'ingredients': ingredients,
        'persons': int(persons),
    }

In [48]:
# for test
url = 'https://www.cuisine-etudiant.fr/recette/4548-quesadillas'
scrap_recipe_page(url)

{'title': 'Quesadillas',
 'ingredients': ['Galette de blé ou de maïs',
  'Poulet',
  'Fromage râpé',
  'Crème fraîche',
  "Huile d'olive",
  'Épices type curry paprika',
  'Sel',
  'Poivre'],
 'persons': 1}

Scrapp many random recipes

In [49]:
with open('recipes_links.txt', 'r') as f:
    recipes_links = f.readlines()
selected_links = random.sample(recipes_links, 10)
selected_links = [l.replace('\n', '') for l in selected_links]
selected_links

['https://www.cuisine-etudiant.fr/recette/7826-flanboisier-aux-peches',
 'https://www.cuisine-etudiant.fr/recette/292-spaghetti-napolitaine-revise',
 'https://www.cuisine-etudiant.fr/recette/6849-yaourts-miel-et-fleur-doranger',
 'https://www.cuisine-etudiant.fr/recette/4305-clafouti-aux-cerises',
 'https://www.cuisine-etudiant.fr/recette/5936-croissant-de-la-mer',
 'https://www.cuisine-etudiant.fr/recette/8994-galettes-des-rois',
 'https://www.cuisine-etudiant.fr/recette/4927-rochers-aux-coco',
 'https://www.cuisine-etudiant.fr/recette/4008-galette-de-legumes',
 'https://www.cuisine-etudiant.fr/recette/6204-blesotto-de-poireaux-jambon-au-cookeo',
 'https://www.cuisine-etudiant.fr/recette/8870-wacky-cake']

In [50]:
recipes = []
for link in tqdm(selected_links):
    recipe = scrap_recipe_page(link)
    recipes.append(recipe)
recipes

100%|██████████| 10/10 [00:03<00:00,  2.71it/s]


[{'title': 'Flanboisier aux pêches',
  'ingredients': ['1 banane bien mûre',
   '250g de fromage blanc 0%',
   '3 œufs',
   '100g de compote sans sucre ajouté',
   '30g de maïzena',
   '10g de Stevia (ou autre sucrant)',
   '2 pêches mûres mais fermes'],
  'persons': 6},
 {'title': 'Spaghetti Napolitaine révisé',
  'ingredients': ['Spaghetti (n°7 barilla de préférence)',
   'Un demi-poivron vert',
   'Une tomate entière ou un quart de boîte de tomates pelées',
   'Un demi-oignon',
   '2 saucisses knackis',
   'Ail',
   "Huile d'olive",
   'Sel, poivre',
   'Ketchup (facultatif)'],
  'persons': 1},
 {'title': "Yaourts miel et fleur d'oranger",
  'ingredients': ['1 Lde lait entier',
   '2 CàS de lait en poudre',
   '1 yaourt nature',
   '2 CàS  de miel liquide',
   "2CàS de fleur d'oranger"],
  'persons': 8},
 {'title': 'clafouti aux cerises',
  'ingredients': ['600 g de cerises',
   '- 40 g de beurre semi-sel + 20 g pour le moule',
   '- 4 oeufs',
   '- 20 cl de lait',
   '- 100 g de fa

In [57]:
with open('recipes_data.json', 'w') as f:
    json.dump(recipes, f, ensure_ascii=False, indent=2)

### Inspection de code

In [20]:
import inspect
import re

def tricherie(recipes: list[dict], max_persons: int):
    res = []
    for rec in recipes:
        if rec['persons'] < max_persons:
            res.append(rec['title'])
    return res



def filter_recipes(recipes: list[dict], max_persons: int) -> list[str]:
    # return [rec['title'] for rec in recipes if rec['persons'] < max_persons]
    print(); return tricherie(recipes, max_persons)



def get_lines_of_code(func) -> list[str]:
    lines_of_code = inspect.getsource(func).strip().split('\n')
    lines_of_code = [line.strip() for maybe_single_line in lines_of_code
                     for line in maybe_single_line.split(';')
                     if line.strip()]
    
    return lines_of_code


lines_of_code = get_lines_of_code(filter_recipes)
lines_of_code

['def filter_recipes(recipes: list[dict], max_persons: int) -> list[str]:',
 "# return [rec['title'] for rec in recipes if rec['persons'] < max_persons]",
 'print()',
 'return tricherie(recipes, max_persons)']