In [1]:
from utils.db import (
    load_recipes_from_db,
    load_ingredients_from_db,
)
from barcart import (
    build_ingredient_tree,
    build_ingredient_distance_matrix,
    build_recipe_volume_matrix,
    compute_emd,
    emd_matrix,
    knn_matrix,
    neighbor_weight_matrix,
    m_step_blosum,
    expected_ingredient_match_matrix,
    build_index_to_id,
    report_ingredient_neighbors,
)
import re
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
recipes = load_recipes_from_db()
ingredients = load_ingredients_from_db()
recipes = recipes.dropna(subset=["volume_fraction"])
# Patch parent nodes to have substitution level 0
ingredients_single_level = ingredients[ingredients["path"].str.match(r"^/\d+/$")]
ingredients.loc[ingredients_single_level.index, "substitution_level"] = 0
ingredients["substitution_level"] = (
    ingredients["substitution_level"].fillna(0).astype(int)
)
ingredients["weight"] = ingredients["substitution_level"].apply(
    lambda x: 10 if x == 0 else 1
)


In [3]:
tree, parent_map = build_ingredient_tree(
    ingredients,
    id_col="id",
    name_col="name",
    path_col="path",
    weight_col="weight",
)

# Step 2: Extract id_to_name mapping
id_to_name = dict(zip(ingredients["id"], ingredients["name"]))

# Step 3: Build matrix and registry atomically (guaranteed consistent)
cost_matrix, registry = build_ingredient_distance_matrix(parent_map, id_to_name)

# Step 4: Use registry throughout
neighbors_df = report_ingredient_neighbors(cost_matrix, registry, k=5)

In [4]:
volume_matrix, recipe_id_to_idx = build_recipe_volume_matrix(
    recipes,
    registry,
    recipe_id_col="recipe_id",
    ingredient_id_col="ingredient_id",
    volume_col="volume_fraction",
)
recipe_idx_to_id = {v: k for k, v in recipe_id_to_idx.items()}
recipe_id_to_name = {
    str(row["recipe_id"]): row["recipe_name"] for _, row in recipes.iterrows()
}


In [5]:
# Build ingredient-level counts without recomputing plans
T_sum, _ = expected_ingredient_match_matrix(
    volume_matrix,
    cost_matrix,
    k=10,
    beta=1.0,
    plan_topk=3,
    plan_minfrac=0.05,
    symmetrize=True,
)


Computing EMD matrix: 100%|██████████| 783/783 [00:47<00:00, 16.44it/s]


In [8]:
C_new = m_step_blosum(T_sum, blosum_alpha=1.0)

In [9]:
new_neighbors_df = report_ingredient_neighbors(C_new, registry, k=20)

In [12]:
neighbors_df.query("ingredient_name == 'Bourbon'")

Unnamed: 0,ingredient_id,ingredient_name,neighbor_id,neighbor_name,cost
50,10,Bourbon,447,Four Roses Bourbon,1.0
51,10,Bourbon,431,Maker's Mark Bourbon,1.0
52,10,Bourbon,388,Russell's Reserve 10-year old,1.0
53,10,Bourbon,436,Old Grand-dad 100 proof,1.0
54,10,Bourbon,1,Whiskey,10.0


In [17]:
new_neighbors_df.query("ingredient_name == 'Lime Juice'")

Unnamed: 0,ingredient_id,ingredient_name,neighbor_id,neighbor_name,cost
320,15,Lime Juice,32,Lemon Juice,0.68135
321,15,Lime Juice,45,Grapefruit Juice,0.877774
322,15,Lime Juice,130,Column Still Lightly Aged Rum,0.966324
323,15,Lime Juice,48,Pineapple Juice,0.966371
324,15,Lime Juice,96,Orange Juice,0.978425
325,15,Lime Juice,404,Key Lime Juice,0.978968
326,15,Lime Juice,387,Rhum JM Blanc 100 proof,0.990471
327,15,Lime Juice,61,Dry Vermouth,0.99253
328,15,Lime Juice,30,Crème De Cassis,0.998927
329,15,Lime Juice,248,Verjus,1.000465


In [6]:
q = knn_idx[recipe_id_to_idx["113"]]
[recipe_id_to_name[recipe_idx_to_id[int(ii)]] for ii in q]

['Regal Daiquiri',
 'John De Piper’s\xa0Mojito',
 'La Bomba Daiquiri',
 "I Don't Mind You Shooting Me, Frank, But Take It Easy on the\xa0Rum",
 "Erick Castro's Hemingway Daiquiri",
 'Daiquiri #4',
 'Jelani Johnson’s\xa0Mojito',
 "Dan Sabo's Mojito",
 'Hole in the\xa0Fence',
 'Hemingway Daiquiri']

In [None]:
recipes[recipes["recipe_name"].str.lower().str.contains("daiquiri")][
    ["recipe_id", "recipe_name"]
].drop_duplicates()

Unnamed: 0,recipe_id,recipe_name
26,8,Brucato Nuclear Daiquiri
88,21,Daiquiri
460,113,Alex Day’s Daiquiri
1067,254,Daiquiri #2
1072,255,Daiquiri #4
1076,256,Daiquiri Clasico
1217,286,Dry Daiquiri
1297,301,Corduroy Daiquiri
1345,310,Don's Special Daiquiri
1351,311,Dry Pornstar Daiquiri


In [7]:
# Query recipes whose name contains 'daiquiri', 'margarita', or 'sour'
mask = (
    recipes["recipe_name"]
    .str.lower()
    .str.contains("daiquiri|margarita|sour|bird|collins", regex=True)
)
sours = recipes[mask].copy()


In [8]:
print(
    f"n_recipes: {len(sours['recipe_id'].unique())}; n_ingredients: {len(sours['ingredient_id'].unique())}"
)

n_recipes: 64; n_ingredients: 89
