In [39]:
from mercapy import Product as WarehouseProduct, WAREHOUSES
from product import Product
import os, json, csv

CATALOGS_PATH = "catalogs"

In [12]:
warehouse_paths = [f.split("_")[0] for f in os.listdir(CATALOGS_PATH)]

In [14]:
def load_catalog(warehouse, language="en"):
    catalog_path = os.path.join(CATALOGS_PATH, warehouse) + "_catalog.json"
    with open(catalog_path, "r") as file:
        catalog = json.load(file)

    return [WarehouseProduct(p, warehouse, language) for p in catalog]


catalogs = {}
for warehouse in warehouse_paths:
    catalogs[warehouse] = load_catalog(warehouse)

In [28]:
unique_products = []

def is_unique(product: WarehouseProduct):
    ids = [p.id for p in unique_products]
    return product.id not in ids

def find_identical(target_product: WarehouseProduct):
    identical_products = []

    for catalog in catalogs.values():
        for product in catalog:
            if target_product.id == product.id:
                identical_products.append(product)

    return identical_products

def add_unique_products():
    for catalog in catalogs.values():
        for product in catalog:
            if is_unique(product):
                identical_products = find_identical(product) 
                warehouses = [p.warehouse for p in identical_products]

                unique_product = Product(product.id, warehouses, product.language)
                unique_products.append(unique_product)

add_unique_products()
print(f"There are {len(unique_products)} products in Mercadona's catalog.")

There are 5904 products in Mercadona's catalog.


In [None]:
def populate_product_data(products: list[Product]):
    for i,product in enumerate(products):
        product._fetch_data()
        
        if (i+1) % 500 == 0:
            print(f"{round((i+1)/len(products))*100}% | {i+1} out of {len(products)} done...")

populate_product_data(unique_products)

In [47]:
def make_csv(products: list[Product], path: str):
    # Define the header based on the fields of the Product class
    fieldnames = [
        "id",
        "ean",
        "name",
        "legal_name",
        "unit_price",
        "is_discounted",
        "previous_price",
        "origin",
        "supplier",
        "weight",
        "category",
        "age_check",
        "alcohol_by_volume",
        "is_new",
        "is_pack",
        "pack_size",
    ]

    # Create a list of dictionaries
    data = []
    for p in products:
        product_data = {
                "id": p.id,
                "ean": p.ean,
                "name": p.name,
                "legal_name": p.legal_name,
                "unit_price": p.unit_price,
                "is_discounted": p.is_discounted,
                "previous_price": p.previous_price,
                "origin": p.origin,
                "weight": p.weight,
                "category": p.categories[0],
                "age_check": p.age_check,
                "alcohol_by_volume": p.alcohol_by_volume,
                "is_new": p.is_new,
                "is_pack": p.is_pack,
                "pack_size": p.pack_size,
        }
        if p.suppliers:
            product_data["supplier"] = p.suppliers[0]
        if p.categories:
            product_data["category"] = p.categories[0]
        data.append(product_data)

    # Write to the CSV file using DictWriter
    with open(path, "w", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
    
        for row in data:
            try:
                writer.writerow(row)
            except Exception as e:
                print(f"Skipping row {row} due to error: {e}")


make_csv(unique_products, "catalog.csv")

Skipping row {'id': '22332', 'ean': '8437010848230', 'name': 'Sliced \u200b\u200bgrilled chicken breast La Carloteña', 'legal_name': 'PECHUGUITA DE POLLO ASADA AL HORNO', 'unit_price': 2.6, 'is_discounted': False, 'previous_price': None, 'origin': 'España', 'weight': 0.125, 'category': 'Deli & cheese', 'age_check': False, 'alcohol_by_volume': None, 'is_new': False, 'is_pack': False, 'pack_size': None, 'supplier': 'Carloteña de Asados S.L.'} due to error: 'charmap' codec can't encode characters in position 27-28: character maps to <undefined>
Skipping row {'id': '23359', 'ean': '8480000233592', 'name': 'Rustic natural white sliced \u200b\u200bbread 0% added sugar Hacendado', 'legal_name': 'Pan de Molde', 'unit_price': 1.6, 'is_discounted': False, 'previous_price': None, 'origin': 'España', 'weight': 0.55, 'category': 'Bread & bakery', 'age_check': False, 'alcohol_by_volume': None, 'is_new': False, 'is_pack': False, 'pack_size': None, 'supplier': 'Mercadona'} due to error: 'charmap' code