# Open Food Facts Saytidan malumotlar olamiz 

In [1]:
import requests
import csv
import os
import time

SAVE_PATH = r"C:\Users\Rasulbek907\Desktop\Project_MP\Data\Web_Scrapping\openfoodfacts_fooddata.csv"

# Kengaytirilgan ustunlar
FIELDNAMES = [
    # Identifikatsiya
    "code", "product_name", "brands", "categories", "countries", "quantity", "packaging",
    
    # Oziqaviy tarkib
    "energy_kcal", "fat", "saturated_fat", "trans_fat", "cholesterol",
    "carbohydrates", "fiber", "proteins", "sugars", "salt", "sodium",
    "vitamin_a", "vitamin_c", "vitamin_d", "vitamin_e", "vitamin_b12",
    "iron", "calcium", "potassium", "zinc",

    # Ingredientlar va allergenlar
    "ingredients", "allergens", "traces", "additives", "nova_group", "labels",

    # Sifat va ekologik ko‘rsatkichlar
    "nutriscore", "ecoscore", "carbon_footprint", "environment_impact",

    # Tizim ma’lumotlari
    "created_t", "last_modified_t", "languages"
]


def fetch_page(page, page_size=1000):
    """Open Food Facts API dan bitta sahifa ma'lumotlarini olish"""
    url = "https://world.openfoodfacts.org/cgi/search.pl"
    params = {
        "action": "process",
        "json": True,
        "page": page,
        "page_size": page_size,
        "fields": ",".join(FIELDNAMES),
    }
    r = requests.get(url, params=params, timeout=60)
    if r.status_code == 200:
        return r.json().get("products", [])
    else:
        print(f"⚠️ Sahifa {page} yuklanmadi. Status: {r.status_code}")
        return []


def parse_product(p):
    """Har bir mahsulot uchun kerakli maydonlarni tozalab olish"""
    def get_field(*keys):
        for k in keys:
            if k in p and p[k] not in (None, "", []):
                return p[k]
        return ""

    return {
        # Identifikatsiya
        "code": get_field("code"),
        "product_name": get_field("product_name"),
        "brands": get_field("brands"),
        "categories": get_field("categories"),
        "countries": get_field("countries"),
        "quantity": get_field("quantity"),
        "packaging": get_field("packaging"),

        # Oziqaviy tarkib
        "energy_kcal": get_field("energy-kcal_100g"),
        "fat": get_field("fat_100g"),
        "saturated_fat": get_field("saturated-fat_100g"),
        "trans_fat": get_field("trans-fat_100g"),
        "cholesterol": get_field("cholesterol_100g"),
        "carbohydrates": get_field("carbohydrates_100g"),
        "fiber": get_field("fiber_100g"),
        "proteins": get_field("proteins_100g"),
        "sugars": get_field("sugars_100g"),
        "salt": get_field("salt_100g"),
        "sodium": get_field("sodium_100g"),
        "vitamin_a": get_field("vitamin-a_100g"),
        "vitamin_c": get_field("vitamin-c_100g"),
        "vitamin_d": get_field("vitamin-d_100g"),
        "vitamin_e": get_field("vitamin-e_100g"),
        "vitamin_b12": get_field("vitamin-b12_100g"),
        "iron": get_field("iron_100g"),
        "calcium": get_field("calcium_100g"),
        "potassium": get_field("potassium_100g"),
        "zinc": get_field("zinc_100g"),

        # Ingredientlar va allergenlar
        "ingredients": get_field("ingredients_text"),
        "allergens": get_field("allergens_tags"),
        "traces": get_field("traces_tags"),
        "additives": get_field("additives_tags"),
        "nova_group": get_field("nova_group"),
        "labels": get_field("labels_tags"),

        # Sifat va ekologik ko‘rsatkichlar
        "nutriscore": get_field("nutriscore_grade"),
        "ecoscore": get_field("ecoscore_grade"),
        "carbon_footprint": get_field("carbon-footprint_100g"),
        "environment_impact": get_field("environment_impact_level_tags"),

        # Tizim ma’lumotlari
        "created_t": get_field("created_t"),
        "last_modified_t": get_field("last_modified_t"),
        "languages": get_field("languages_tags"),
    }


if __name__ == "__main__":
    os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)

    print("🚀 Mahsulotlar haqida ma'lumot yuklanmoqda...\n")

    all_products = []
    total_pages = 50  # 5 sahifa × 1000 = 5000 mahsulot

    start = time.time()
    for page in range(1, total_pages + 1):
        data = fetch_page(page)
        if not data:
            print(f"⚠️ {page}-sahifada ma'lumot topilmadi.")
            continue

        parsed = [parse_product(p) for p in data]
        all_products.extend(parsed)
        print(f"✅ Sahifa {page} — {len(parsed)} ta mahsulot yuklandi.")

    # CSV saqlash
    with open(SAVE_PATH, "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=FIELDNAMES)
        writer.writeheader()
        writer.writerows(all_products)

    print(f"\n✅ {len(all_products)} ta mahsulot saqlandi: {SAVE_PATH}")
    print(f"⏱️ Yakunlandi: {round(time.time() - start, 2)} soniya")


🚀 Mahsulotlar haqida ma'lumot yuklanmoqda...

✅ Sahifa 1 — 100 ta mahsulot yuklandi.
✅ Sahifa 2 — 100 ta mahsulot yuklandi.
✅ Sahifa 3 — 100 ta mahsulot yuklandi.
✅ Sahifa 4 — 100 ta mahsulot yuklandi.
✅ Sahifa 5 — 100 ta mahsulot yuklandi.
✅ Sahifa 6 — 100 ta mahsulot yuklandi.
✅ Sahifa 7 — 100 ta mahsulot yuklandi.
✅ Sahifa 8 — 100 ta mahsulot yuklandi.
✅ Sahifa 9 — 100 ta mahsulot yuklandi.
✅ Sahifa 10 — 100 ta mahsulot yuklandi.
✅ Sahifa 11 — 100 ta mahsulot yuklandi.
✅ Sahifa 12 — 100 ta mahsulot yuklandi.
✅ Sahifa 13 — 100 ta mahsulot yuklandi.
✅ Sahifa 14 — 100 ta mahsulot yuklandi.
✅ Sahifa 15 — 100 ta mahsulot yuklandi.
✅ Sahifa 16 — 100 ta mahsulot yuklandi.
✅ Sahifa 17 — 100 ta mahsulot yuklandi.
✅ Sahifa 18 — 100 ta mahsulot yuklandi.
✅ Sahifa 19 — 100 ta mahsulot yuklandi.
✅ Sahifa 20 — 100 ta mahsulot yuklandi.
✅ Sahifa 21 — 100 ta mahsulot yuklandi.
✅ Sahifa 22 — 100 ta mahsulot yuklandi.
✅ Sahifa 23 — 100 ta mahsulot yuklandi.
✅ Sahifa 24 — 100 ta mahsulot yuklandi.
✅ S

In [2]:
import pandas as pd
df = pd.read_csv(r"C:\Users\Rasulbek907\Desktop\Project_MP\Data\Web_Scrapping\openfoodfacts_fooddata.csv")

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 40 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   code                5000 non-null   int64  
 1   product_name        4761 non-null   object 
 2   brands              4730 non-null   object 
 3   categories          4914 non-null   object 
 4   countries           4998 non-null   object 
 5   quantity            4439 non-null   object 
 6   packaging           3424 non-null   object 
 7   energy_kcal         0 non-null      float64
 8   fat                 0 non-null      float64
 9   saturated_fat       0 non-null      float64
 10  trans_fat           0 non-null      float64
 11  cholesterol         0 non-null      float64
 12  carbohydrates       0 non-null      float64
 13  fiber               0 non-null      float64
 14  proteins            0 non-null      float64
 15  sugars              0 non-null      float64
 16  salt  

In [4]:
df.head()

Unnamed: 0,code,product_name,brands,categories,countries,quantity,packaging,energy_kcal,fat,saturated_fat,...,additives,nova_group,labels,nutriscore,ecoscore,carbon_footprint,environment_impact,created_t,last_modified_t,languages
0,6111035000430,Sidi Ali,Sidi Ali,"Beverages and beverages preparations,Beverages...",Morocco,33 cl,"Plastic, Bottle",,,,...,,,,,,,,1439924914,1757170583,
1,6111242100992,perly,perly,Snacks,"Morocco,United States",100 g,Plastique,,,,...,,3.0,,,,,,1474037086,1761428815,
2,6111035002175,Sidi Ali,sidi ali,"Beverages and beverages preparations,Beverages...",Morocco,2 L,,,,,...,,1.0,,,,,,1537111522,1760896730,
3,6111035000058,Eau minérale naturelle,"Les Eaux Minérales d'oulmès,Sidi Ali","Beverages and beverages preparations,Beverages...",Morocco,"1,5 L","Plastique,Bouteille ou Flacon,Bouteille",,,,...,,1.0,,,,,,1409671459,1761218197,
4,6111252421568,اكوافينا,AQUAFINA,"Boissons et préparations de boissons,Boissons,...",المغرب,33cl,"en:Plastic, en:Bottle",,,,...,,,,,,,,1553344271,1757702458,
