# _Importation des prix_

In [1]:
import pandas as pd
import aiohttp
import asyncio
import requests
from tqdm.asyncio import tqdm

#obtention de la dernière page
size=1000 # nombre d'élément par page que l'on souhaite

pricesPagesUrl=f"https://prices.openfoodfacts.org/api/v1/prices?size={size}"

try:
    responsePages= requests.get(pricesPagesUrl)
    pagesJson=responsePages.json()
    totalPages=pagesJson["pages"]
    #print(f"le nombre de page total pour une taille de {size} est de :{totalPages}")
except Exception as e:
    print(f"l'erreur {e} est survenue lors de la détermination du nombre de page total")


StatementMeta(, 39369846-4703-41bf-9f1d-9962574b177a, 3, Finished, Available, Finished)

In [2]:
#Fonction pour parser des prix
def parse_price(price_json):
    try:
        return {
            "id": price_json.get("id"),
            "product_id": price_json.get("product_id"),
            "location_id": price_json.get("location_id"),
            "type": price_json.get("type"),
            "price": price_json.get("price"),
            "price_is_discounted": price_json.get("price_is_discounted"),
            "price_without_discount": price_json.get("price_without_discount"),
            "price_per": price_json.get("price_per"),
            "currency": price_json.get("currency"),
            "date": price_json.get("date"),
            "receipt_quantity": price_json.get("receipt_quantity"),
            "owner": price_json.get("owner"),
            "created": price_json.get("created"),
            "updated": price_json.get("updated"),
        }
    except Exception as e:
        print(f"L'erreur {e} est survenue lors de l'exécution de la fonction parse_price")
        return None

StatementMeta(, 39369846-4703-41bf-9f1d-9962574b177a, 4, Finished, Available, Finished)

In [3]:
# Fonction asynchrone pour collecter les prix d'une seule page
async def fetch_prices(session, url):
    try:
        async with session.get(url) as response:
            if response.status != 200:
                print(f"Erreur HTTP {response.status} pour l'URL {url}")
                return []
            price_json = await response.json()
            return [parse_price(price) for price in price_json.get("items", [])]
    except Exception as e:
        print(f"L'erreur {e} est survenue lors de l'exécution de fetch_prices pour l'URL {url}")
        return []

# Fonction principale pour collecter les prix sur toutes les pages
async def get_prices(base_url, total_pages, size):
    priceList = []
    async with aiohttp.ClientSession() as session:
        tasks = [
            fetch_prices(session, base_url.format(page, size))
            for page in range(1, total_pages + 1)
        ]
        for task in tqdm(asyncio.as_completed(tasks), total=total_pages, desc="Chargement des prix"):
            priceList.extend(await task)
    return priceList

StatementMeta(, 39369846-4703-41bf-9f1d-9962574b177a, 5, Finished, Available, Finished)

In [4]:
urlPrice="https://prices.openfoodfacts.org/api/v1/prices?page={}&size={}"
price_list= await get_prices(base_url=urlPrice,total_pages=totalPages,size=size)

StatementMeta(, 39369846-4703-41bf-9f1d-9962574b177a, 6, Finished, Available, Finished)

Chargement des prix: 100%|██████████| 59/59 [00:14<00:00,  4.10it/s]


In [6]:
dfPrices=spark.createDataFrame(price_list)
#display(dfPrices)

StatementMeta(, 39369846-4703-41bf-9f1d-9962574b177a, 8, Finished, Available, Finished)

In [7]:
dfPrices.write.format("parquet").mode("overwrite").save("Files/Bronze/Prices.parquet")

StatementMeta(, 39369846-4703-41bf-9f1d-9962574b177a, 9, Finished, Available, Finished)