In [1]:
import pandas as pd
import json
import time
from datetime import datetime

start_time = time.time()

def log(msg):
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{now}] {msg}")

df_sheets = pd.read_excel("./ElafProducts.xlsx", sheet_name=None)

# remove unwanted sheets
mainCats = [sheet for sheet in df_sheets.keys() if sheet not in ["Countries Code", "طعام طازجold"]]

items = []
added_categories = set()  # to track unique categories

def clean_sheet(df):
    df = df.dropna(axis=1, how="all")
    if df.columns.str.contains("Unnamed").all() or all([str(c) == "nan" for c in df.columns]):
        df.columns = df.iloc[0]
        df = df[1:]
    df.columns = df.columns.astype(str)
    df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
    return df

def add_category_if_new(category, parent):
    key = (category.strip(), parent.strip() if parent else None)
    if key not in added_categories:
        items.append({"category": category.strip(), "parentCategory": parent.strip() if parent else None})
        added_categories.add(key)

def add_categories_dynamic(mainName, df_clean):
    # Add main category
    add_category_if_new(mainName, None)
    
    # Detect columns
    if "الصنف الاول" in df_clean.columns and "الصنف الثاني" in df_clean.columns:
        col1, col2 = "الصنف الاول", "الصنف الثاني"
        for first, group in df_clean.groupby(col1):
            if pd.isna(first):
                continue
            first = str(first).strip()
            add_category_if_new(first, mainName)
            for second in group[col2].dropna().unique().tolist():
                second = str(second).strip()
                add_category_if_new(second, first)
    
    elif "التصنيف الفرعي الأول" in df_clean.columns:
        cols = ["التصنيف الفرعي الأول", "التصنيف الفرعي الثاني", "التصنيف الفرعي الثالث"]
        cols = [c for c in cols if c in df_clean.columns]
        for _, row in df_clean.iterrows():
            parent = mainName
            for c in cols:
                val = row[c]
                if pd.isna(val):
                    break
                val = str(val).strip()
                add_category_if_new(val, parent)
                parent = val

# Process sheets
for mainName in mainCats:
    sheet_df = df_sheets[mainName]
    clean_df = clean_sheet(sheet_df)
    log(f"Processing sheet: {mainName} with {len(clean_df)} rows")
    add_categories_dynamic(mainName, clean_df)

# Save JSON
json_output = json.dumps(items, ensure_ascii=False, indent=2)
with open("categories.json", "w", encoding="utf-8") as f:
    f.write(json_output)

end_time = time.time()
log(f"Saved {len(items)} categories to categories.json")
log(f"Processing time: {end_time - start_time:.2f} seconds")


[2025-12-09 00:31:08] Processing sheet:  طعام طازج with 1639 rows
[2025-12-09 00:31:08] Processing sheet: أكسسوارات السيارات with 2446 rows
[2025-12-09 00:31:08] Processing sheet: خزانة الطعام with 5960 rows
[2025-12-09 00:31:08] Processing sheet: المشروبات with 1642 rows
[2025-12-09 00:31:08] Processing sheet: خضار و فواكه with 425 rows
[2025-12-09 00:31:08] Processing sheet: العاب و مستلزمات النشاطات الخار with 79 rows
[2025-12-09 00:31:08] Processing sheet: أطعمة ومستلزمات الحيوانات الألي with 1045 rows
[2025-12-09 00:31:08] Processing sheet: منتجات الرياضة واللياقة البدنية with 74 rows
[2025-12-09 00:31:08] Processing sheet: القرطاسية ومستلزمات المدرسة with 279 rows
[2025-12-09 00:31:08] Processing sheet: أغذية عضوية وحيوية with 521 rows
[2025-12-09 00:31:08] Processing sheet: مخبوزات with 410 rows
[2025-12-09 00:31:08] Processing sheet: منتجات ورقيه, صحف وغيرها with 17 rows
[2025-12-09 00:31:08] Processing sheet: ملابس, اكسسوارات وحقائب with 816 rows
[2025-12-09 00:31:08] Processi