# Notebook 30 - Appendix Recipe Match Tables

This notebook exports detailed match tables per recipe to support transparency in the appendix. For each recipe, it lists:

- Ingredients and their candidate product matches
- Matching scores (semantic, fuzzy)
- Waste-aware priority flags
- Matched store and article information (if available)

These tables can be included in the appendix or published as supporting materials.

**Objectives**
- Load semantic and fuzzy match results
- Merge with ingredient context and product metadata
- Export per-recipe CSV files for transparency

**Inputs**
- matching_scored/matching_matrix_semantic.csv
- matching_scored/matching_matrix_fuzzy.csv
- variant_exports/recipes_with_variants.csv
- cleaned_data/products_with_priority.csv

**Outputs**
- appendix_tables/semantic_matches/recipe_<name>.csv
- appendix_tables/fuzzy_matches/recipe_<name>.csv


In [7]:
import os
import pandas as pd

# Define folders
match_folder = "matching_scored"
variant_folder = "variant_exports"
priority_folder = "cleaned_data"
output_folder_semantic = os.path.join("appendix_tables", "semantic_matches")
output_folder_fuzzy = os.path.join("appendix_tables", "fuzzy_matches")
os.makedirs(output_folder_semantic, exist_ok=True)
os.makedirs(output_folder_fuzzy, exist_ok=True)

# Define files
semantic_file = os.path.join(match_folder, "matching_matrix_semantic.csv")
fuzzy_file = os.path.join(match_folder, "matching_matrix_fuzzy.csv")
recipes_file = os.path.join(variant_folder, "recipes_with_variants.csv")
products_file = os.path.join(priority_folder, "products_with_priority.csv")

# Load data
df_semantic = pd.read_csv(semantic_file)
df_fuzzy = pd.read_csv(fuzzy_file)
df_recipes = pd.read_csv(recipes_file)
df_products = pd.read_csv(products_file)

# Fix missing row_id if needed
if "row_id" not in df_recipes.columns:
    df_recipes = df_recipes.reset_index(drop=False).rename(columns={"index": "row_id"})

# Normalize keys
for df in [df_semantic, df_fuzzy, df_recipes]:
    df["row_id"] = pd.to_numeric(df["row_id"], errors="coerce").astype("Int64")
    df["ingredient"] = df["ingredient"].astype(str).str.strip().str.lower()
df_recipes["recipe"] = df_recipes["recipe"].astype(str).str.strip()

print("All files loaded and normalized.")


All files loaded and normalized.


In [9]:
# Merge recipe names into match tables
df_products = df_products.rename(columns={"article": "product_article"})

df_semantic = df_semantic.merge(
    df_recipes[["row_id", "ingredient", "recipe"]],
    on=["row_id", "ingredient"],
    how="left"
)

df_fuzzy = df_fuzzy.merge(
    df_recipes[["row_id", "ingredient", "recipe"]],
    on=["row_id", "ingredient"],
    how="left"
)

# Merge in product metadata (for waste/markdown flags)
df_products["product_article"] = pd.to_numeric(df_products["product_article"], errors="coerce")
for df in [df_semantic, df_fuzzy]:
    df["product_article"] = pd.to_numeric(df["product_article"], errors="coerce")
    df.merge(df_products[["product_article", "product_concept", "priority_score", "waste_flag", "markdown_flag"]],
             on="product_article", how="left")

print(" Merged recipe and product context into semantic and fuzzy match tables.")


 Merged recipe and product context into semantic and fuzzy match tables.


In [10]:
# Export detailed per-recipe match tables
for recipe in df_recipes["recipe"].unique():
    # Filter rows for this recipe
    df_sem = df_semantic[df_semantic["recipe"] == recipe]
    df_fuz = df_fuzzy[df_fuzzy["recipe"] == recipe]

    # Clean recipe name for file-safe export
    file_safe = recipe.lower().replace("&", "and").replace(" ", "_").replace("/", "_").replace("'", "").replace(",", "")
    
    # Save to folder
    df_sem.to_csv(os.path.join(output_folder_semantic, f"recipe_{file_safe}.csv"), index=False)
    df_fuz.to_csv(os.path.join(output_folder_fuzzy, f"recipe_{file_safe}.csv"), index=False)

print("Exported per-recipe match tables for semantic and fuzzy results.")


Exported per-recipe match tables for semantic and fuzzy results.
