# Notebook 15 - Generate Store Inventory Export per Meal Box

This notebook prepares store-specific inventory plans by mapping ingredients in each store’s meal box recipes to real available products.

Steps:
- Match ingredient concepts with product concepts (exact + fuzzy)
- Add waste and markdown prioritization
- Output one CSV per store: article-level instructions per recipe

Inputs:
- `store_mealbox_ranked.csv` - Ranked store–recipe matches
- `products_with_priority.csv` - Store-specific products with concept tags
- `recipes_with_ontology.csv` - Ingredient-to-concept mappings

Outputs:
- One CSV per store in `store_inventory_exports/`
- Debug log of unmatched concepts (if any)


In [27]:
import pandas as pd
import os
import ast
from rapidfuzz import process, fuzz

# Paths
input_folder = "cleaned_data"
output_folder = "store_inventory_exports"
debug_folder = "debug_logs"
os.makedirs(output_folder, exist_ok=True)
os.makedirs(debug_folder, exist_ok=True)

# Load inputs
df_ranked = pd.read_csv(os.path.join(input_folder, "store_mealbox_ranked.csv"))
df_products = pd.read_csv(os.path.join(input_folder, "products_with_priority.csv"))
df_recipes = pd.read_csv(os.path.join(input_folder, "recipes_with_ontology.csv"))

# Explode ingredients
df_ranked = df_ranked.explode("ingredients").dropna()

# Extract clean ingredient concepts
def extract_concept(value):
    try:
        items = ast.literal_eval(value)
        return items[0] if isinstance(items, list) and items else None
    except Exception:
        return None

df_ranked["ingredient_concept"] = df_ranked["ingredients"].apply(extract_concept).str.strip().str.lower()
df_products["product_concept"] = df_products["product_concept"].astype(str).str.strip().str.lower()

print("Loaded:")
print("  Ranked recipes:", df_ranked.shape)
print("  Products:", df_products.shape)


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Loaded:
  Ranked recipes: (9, 5)
  Products: (126919, 35)


In [29]:
# Try exact match on store + concept
df_exact = df_ranked.merge(
    df_products,
    left_on=["store", "ingredient_concept"],
    right_on=["store", "product_concept"],
    how="left"
)

# Split into matched and unmatched
df_matched_exact = df_exact.dropna(subset=["article"])
df_unmatched = df_exact[df_exact["article"].isna()].copy().reset_index(drop=True)

print("Exact matches found:", df_matched_exact.shape[0])
print("Remaining unmatched rows to try fuzzy match:", df_unmatched.shape[0])


Exact matches found: 10
Remaining unmatched rows to try fuzzy match: 0


In [30]:
# Fuzzy matching fallback (only if unmatched rows exist)
if not df_unmatched.empty:
    # Store-wise concept dictionary
    store_product_concepts = (
        df_products.groupby("store")["product_concept"]
        .apply(set)
        .to_dict()
    )

    # Define fuzzy matcher
    def fuzzy_match(row, threshold=90):
        store = row["store"]
        ingredient = row["ingredient_concept"]
        options = store_product_concepts.get(store, [])
        if not options:
            return None
        match = process.extractOne(ingredient, options, scorer=fuzz.token_sort_ratio)
        return match[0] if match and match[1] >= threshold else None

    # Apply fuzzy match per row
    fuzzy_matches = []
    for _, row in df_unmatched.iterrows():
        fuzzy_matches.append(fuzzy_match(row))

    # Assign matches back (aligned by order)
    df_unmatched["fuzzy_product_concept"] = fuzzy_matches

    # Merge fuzzy matches back with products
    df_fuzzy_matched = df_unmatched.merge(
        df_products,
        left_on=["store", "fuzzy_product_concept"],
        right_on=["store", "product_concept"],
        how="left"
    ).dropna(subset=["article"])
else:
    # No fuzzy matching needed
    df_fuzzy_matched = pd.DataFrame(columns=df_matched_exact.columns)

print("Recovered fuzzy matches:", df_fuzzy_matched.shape[0])


Recovered fuzzy matches: 0


In [31]:
# Combine all matches (exact + fuzzy)
df_all_matched = pd.concat([df_matched_exact, df_fuzzy_matched], ignore_index=True)

# Keep relevant columns
columns_to_keep = [
    "store", "recipe", "ingredient_concept", "article",
    "product_name_clean", "priority_score", "waste_flag", "markdown_flag"
]
df_all_matched = df_all_matched[columns_to_keep].drop_duplicates()

print("Total matched ingredient–article pairs:", df_all_matched.shape[0])
print("Stores covered:", df_all_matched['store'].nunique())


Total matched ingredient–article pairs: 9
Stores covered: 9


In [32]:
# Save one file per store
for store_id, group in df_all_matched.groupby("store"):
    filename = f"inventory_{store_id}.csv"
    path = os.path.join(output_folder, filename)
    group.to_csv(path, index=False)

print(f"Exported {df_all_matched['store'].nunique()} store inventory files to: {output_folder}")


Exported 9 store inventory files to: store_inventory_exports


In [34]:
# Check if fuzzy_product_concept exists
if "fuzzy_product_concept" in df_unmatched.columns:
    unmatched_ingredients = df_unmatched[df_unmatched["fuzzy_product_concept"].isna()]
else:
    unmatched_ingredients = df_unmatched.copy()  # fallback if no fuzzy match attempted

unmatched_summary = unmatched_ingredients.groupby("store")["ingredient_concept"].unique()

# Save debug file per store if there are unmatched ingredients
for store_id, missing_concepts in unmatched_summary.items():
    if len(missing_concepts) > 0:
        debug_path = os.path.join(debug_folder, f"unmatched_{store_id}.txt")
        with open(debug_path, "w") as f:
            f.write("Unmatched ingredient concepts:\n")
            for concept in sorted(missing_concepts):
                f.write(f"- {concept}\n")

print(f"Saved {len(unmatched_summary)} debug logs to: {debug_folder}")


Saved 0 debug logs to: debug_logs


---

**Notebook Summary**

This notebook generates per-store inventory export plans by matching recipe ingredient concepts to available store-specific products using both exact and fuzzy matching. It integrates product-level waste and markdown prioritization and outputs article-level instructions for each meal box per store. If any concepts remain unmatched, the notebook logs them for further review.
