In [1]:
import sys
from pathlib import Path
import pandas as pd

pd.set_option("display.max_columns", None)

# Add src to path
SRC_DIR = Path("..") / "src"
sys.path.append(str(SRC_DIR))

from matcher import (
    compute_recipe_ingredient_status,
    compute_recipe_match_metrics,
    get_missing_ingredients
)

In [2]:
DATA_DIR = Path("../data")

ingredients = pd.read_csv(DATA_DIR / "ingredients.csv")
recipes = pd.read_csv(DATA_DIR / "recipes.csv")
recipe_ingredients = pd.read_csv(DATA_DIR / "recipe_ingredients.csv")
pantry = pd.read_csv(DATA_DIR / "pantry.csv")
recipe_feedback = pd.read_csv(DATA_DIR / "recipe_feedback.csv")

In [23]:
user_preferences = {
    "meal_type": "breakfast",          # breakfast | meal | snack | dessert | beverage
    "allow_airfryer": False,
    "allow_soaking": False,
    "min_pantry_match_pct": 10
}

In [24]:
# Pantry Matching
ingredient_status = compute_recipe_ingredient_status(
    recipe_ingredients_df=recipe_ingredients,
    pantry_df=pantry
)

recipe_metrics = compute_recipe_match_metrics(ingredient_status)

missing_ingredients = get_missing_ingredients(
    ingredient_status_df=ingredient_status,
    ingredients_df=ingredients
)

In [25]:
base_df = (
    recipe_metrics
    .merge(recipes, on="recipe_id", how="left")
    .merge(missing_ingredients, on="recipe_id", how="left")
)

base_df["missing_ingredients"] = base_df["missing_ingredients"].fillna('[]')

display(base_df)

Unnamed: 0,recipe_id,total_ingredients,available_count,missing_count,partial_count,pantry_match_pct,name,cuisine,dish_type,requires_airfryer,requires_soaking,meal_prep_type,video_url,missing_ingredients
0,1,5,2,3,0,40.0,Vegetable Upma,Indian,breakfast,False,False,good_for_2_days,https://youtube.com/xxx,"[tomato, cumin seeds, salt]"
1,2,1,1,0,0,100.0,Chana Salad,Indian,salad,False,True,prep_components,https://youtube.com/yyy,[]
2,4,2,2,0,0,100.0,Overnight Oats,Global,breakfast,False,True,good_for_3_days,https://youtube.com/aaa,[]


In [26]:
def apply_constraints(df, prefs):
    filtered = df.copy()

    filtered = filtered[
        filtered["pantry_match_pct"] >= prefs["min_pantry_match_pct"]
    ]

    filtered = filtered[filtered["dish_type"] == prefs["meal_type"]]

    if not prefs["allow_airfryer"]:
        filtered = filtered[filtered["requires_airfryer"] == False]

    if not prefs["allow_soaking"]:
        filtered = filtered[filtered["requires_soaking"] == False]

    return filtered

In [27]:
filtered_df = apply_constraints(base_df, user_preferences)
display(filtered_df)

Unnamed: 0,recipe_id,total_ingredients,available_count,missing_count,partial_count,pantry_match_pct,name,cuisine,dish_type,requires_airfryer,requires_soaking,meal_prep_type,video_url,missing_ingredients
0,1,5,2,3,0,40.0,Vegetable Upma,Indian,breakfast,False,False,good_for_2_days,https://youtube.com/xxx,"[tomato, cumin seeds, salt]"


In [28]:
# Aggregate Feedback
feedback_agg = (
    recipe_feedback
    .groupby("recipe_id")
    .agg(
        avg_rating=("rating", "mean"),
        times_cooked=("rating", "count"),
        would_make_again_rate=("would_make_again", "mean")
    )
    .reset_index()
)

In [29]:
# Merge Feedback & Fill Defaults

scoring_df = filtered_df.merge(
    feedback_agg, on="recipe_id", how="left"
)

scoring_df["avg_rating"] = scoring_df["avg_rating"].fillna(3.0)
scoring_df["times_cooked"] = scoring_df["times_cooked"].fillna(0)
scoring_df["would_make_again_rate"] = scoring_df["would_make_again_rate"].fillna(0.5)

In [30]:
# Compute Final Scores
scoring_df["pantry_score"] = scoring_df["pantry_match_pct"] / 100
scoring_df["rating_score"] = scoring_df["avg_rating"] / 5
scoring_df["repeat_score"] = scoring_df["would_make_again_rate"]

scoring_df["final_score"] = (
    0.6 * scoring_df["pantry_score"] +
    0.3 * scoring_df["rating_score"] +
    0.1 * scoring_df["repeat_score"]
)

# Penalize bad experiences
scoring_df.loc[scoring_df["avg_rating"] < 2.5, "final_score"] *= 0.3
scoring_df.loc[scoring_df["would_make_again_rate"] < 0.3, "final_score"] *= 0.5

In [31]:
# Rank & Select Top N

TOP_N = 5

recommendations = (
    scoring_df
    .sort_values(by="final_score", ascending=False)
    .head(TOP_N)
)

In [32]:
final_output = recommendations[[
    "recipe_id",
    "name",
    "dish_type",
    "pantry_match_pct",
    "missing_ingredients",
    "avg_rating",
    "times_cooked",
    "final_score",
    "video_url"
]]

display(final_output)

Unnamed: 0,recipe_id,name,dish_type,pantry_match_pct,missing_ingredients,avg_rating,times_cooked,final_score,video_url
0,1,Vegetable Upma,breakfast,40.0,"[tomato, cumin seeds, salt]",4.5,1,0.61,https://youtube.com/xxx


In [34]:
# Explain Recommendation (Debug / UI Feature)

def explain_recommendation(recipe_id):
    row = final_output[final_output["recipe_id"] == recipe_id].iloc[0]

    return {
        "Recipe": row["name"],
        "Pantry Match %": row["pantry_match_pct"],
        "Missing Ingredients": row["missing_ingredients"],
        "Avg Rating": row["avg_rating"],
        "Final Score": row["final_score"]
    }

explain_recommendation(final_output.iloc[0]["recipe_id"])

{'Recipe': 'Vegetable Upma',
 'Pantry Match %': 40.0,
 'Missing Ingredients': ['tomato', 'cumin seeds', 'salt'],
 'Avg Rating': 4.5,
 'Final Score': 0.61}