# Notebook 25 - Generate Random Matching Baseline

This notebook generates a random product match for each ingredient per store, constrained to products available in that store. It is used as a baseline to compare the performance of fuzzy and semantic strategies in Notebook 24.

**Inputs**
- `recipes_with_variants.csv` - Cleaned recipe-ingredient metadata
- `products_with_priority.csv` - Store-specific product inventory
- `recipe_store_ranked.csv` - Store–recipe deployment plan

**Output**
- `matching_matrix_random.csv` - Random matches for each store–ingredient pair


In [1]:
import os
import pandas as pd
import numpy as np

input_folder = "variant_exports"
product_folder = "cleaned_data"
ranking_folder = "recipe_ranking"
output_folder = "matching_scored"
os.makedirs(output_folder, exist_ok=True)

# Load inputs
recipes = pd.read_csv(os.path.join(input_folder, "recipes_with_variants.csv"))
products = pd.read_csv(os.path.join(product_folder, "products_with_priority.csv"))
store_recipes = pd.read_csv(os.path.join(ranking_folder, "recipe_store_ranked.csv"))

print("Loaded:")
print(f"- Recipes: {recipes.shape}")
print(f"- Products: {products.shape}")
print(f"- Store-Recipe Ranking: {store_recipes.shape}")


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Loaded:
- Recipes: (6, 8)
- Products: (126919, 35)
- Store-Recipe Ranking: (4, 6)


In [2]:
# Normalize types for merge
recipes["ingredient"] = recipes["ingredient"].str.strip().str.lower()
products["product_concept"] = products["product_concept"].str.strip().str.lower()

# Ensure row_id present
if "row_id" not in recipes.columns:
    recipes = recipes.reset_index(drop=False).rename(columns={"index": "row_id"})


In [3]:
# Merge store–recipe with recipes to get store–ingredient
store_ingredients = store_recipes.merge(recipes[["row_id", "ingredient", "recipe"]], on="recipe", how="left")
print("Store–Ingredient Pairs:", store_ingredients.shape)
store_ingredients.head()


Store–Ingredient Pairs: (4, 8)


Unnamed: 0,store,recipe,matched_ingredients,avg_score,n_ingredients,coverage,row_id,ingredient
0,1024.0,Greek Yogurt & Honey,1,43.369231,1,1.0,2,yogurt
1,1090.0,Greek Yogurt & Honey,1,43.369231,1,1.0,2,yogurt
2,3340.0,Greek Yogurt & Honey,1,43.369231,1,1.0,2,yogurt
3,4255.0,Greek Yogurt & Honey,1,43.369231,1,1.0,2,yogurt


In [8]:
random_matches = []

# For each store–ingredient, randomly pick one product from that store
for _, row in store_ingredients.iterrows():
    store = row["store"]
    concept = row["ingredient"]

    # Filter products available in this store
    available = products[products["store"] == store]

    if available.empty:
        continue

    # Randomly sample one product
    sampled = available.sample(1, random_state=42).iloc[0]

    match = {
        "row_id": row["row_id"],
        "ingredient": row["ingredient"],
        "recipe": row["recipe"],
        "store": store,
        "product_article": sampled["article"],
        "product_name": sampled["product_name_clean"],
        "product_concept": sampled["product_concept"],
        "match_source": "random"
    }
    random_matches.append(match)

# Create DataFrame from matches
df_random = pd.DataFrame(random_matches)
print("Random match matrix:", df_random.shape)

# Save to CSV
output_path = os.path.join(output_folder, "matching_matrix_random.csv")
df_random.to_csv(output_path, index=False)
print("Saved random match matrix to:", output_path)


Random match matrix: (4, 8)
Saved random match matrix to: matching_scored\matching_matrix_random.csv


In [9]:
output_path = os.path.join(output_folder, "matching_matrix_random.csv")
df_random.to_csv(output_path, index=False)
print("Saved random match matrix to:", output_path)


Saved random match matrix to: matching_scored\matching_matrix_random.csv
