In [1]:
from cocktaildb import (
    load_recipes_from_db,
    recipe_distance,
    recipe_distance_emd,
)
import re


In [2]:
recipes = load_recipes_from_db()


In [3]:
# Clean up the substitution levels and build the substitution_levels dict
recipes_single_level = recipes[recipes["ingredient_path"].str.match(r"^/\d+/$")]
recipes.loc[recipes_single_level.index, "substitution_level"] = 0
substitution_levels = dict(
    recipes.drop_duplicates("ingredient_id")[["ingredient_id", "substitution_level"]]
    .set_index("ingredient_id")["substitution_level"].fillna(0).astype(int)
)

In [8]:
daiquiri_recipes = recipes.query("recipe_name.str.contains('Daiquiri')")[
    ["recipe_name", "recipe_id"]
].drop_duplicates()

In [4]:
recipes = recipes.dropna(subset=["volume_fraction"])
for ix, row in recipes.iterrows():
    distance, matches = recipe_distance(
        recipes,
        21,
        row["recipe_id"],
        substitution_levels,
        lambda x: 1 * x,
        lambda x: 10 * x,
        lambda x: 20 * x,
    )
    recipes.at[ix, "distance"] = distance
    distance_emd, plan = recipe_distance_emd(
        recipes,
        21,
        row["recipe_id"],
        substitution_levels,
        lambda x: 0 * x,
        lambda x: 1 * x,
    )
    recipes.at[ix, "distance_emd"] = distance_emd
    recipes.at[ix, "distance"] = distance


In [5]:
recipe_distance_emd(
        recipes,
        21,
        286,
        substitution_levels,
        lambda x: 0 * x,
        lambda x: 1 * x,
    )

(0.48878395496950716,
 [(0, 0, 0.14285714285714285, 0.0),
  (1, 0, 0.055489963472291716, 0.16646989041687515),
  (1, 1, 0.19834710632943467, 0.0),
  (1, 2, 0.023612747559732652, 0.07083824267919796),
  (1, 4, 0.008264468352826662, 0.02479340505847999),
  (2, 2, 0.07556080560498468, 0.22668241681495405),
  (2, 3, 0.4958677658235867, 0.0)])

In [5]:
recipes.query("recipe_id in [286, 21]")

Unnamed: 0,recipe_id,recipe_name,ingredient_id,ingredient_name,ingredient_path,substitution_level,amount,volume_ml,volume_fraction,distance,distance_emd
88,21,Daiquiri,14,Simple Syrup,/113/14/,0.0,0.5,14.78675,0.142857,0.0,0.0
89,21,Daiquiri,15,Lime Juice,/7/15/,0.0,1.0,29.5735,0.285714,0.0,0.0
90,21,Daiquiri,198,Blended Lightly Aged Rum,/2/198/,0.0,2.0,59.147,0.571429,0.0,0.0
1217,286,Dry Daiquiri,14,Simple Syrup,/113/14/,0.0,0.5,14.78675,0.198347,2.367178,0.651712
1218,286,Dry Daiquiri,15,Lime Juice,/7/15/,0.0,0.5,14.78675,0.198347,2.367178,0.651712
1219,286,Dry Daiquiri,53,Campari,/63/303/53/,1.0,0.25,7.393375,0.099174,2.367178,0.651712
1220,286,Dry Daiquiri,198,Blended Lightly Aged Rum,/2/198/,0.0,1.25,36.966875,0.495868,2.367178,0.651712
1221,286,Dry Daiquiri,227,Passionfruit Syrup,/113/227/,0.0,1.0,0.616115,0.008264,2.367178,0.651712


In [6]:
recipes.sort_values(by="distance_emd", ascending=True).drop_duplicates(
    subset=["recipe_id"]
)[["recipe_name", "recipe_id", "distance", "distance_emd"]].head(20)


Unnamed: 0,recipe_name,recipe_id,distance,distance_emd
88,Daiquiri,21,0.0,0.0
1217,Dry Daiquiri,286,2.367178,0.651712
440,Airmail,107,23.428571,0.714286
2748,Near Martinique Swizzle,612,23.752614,0.843206
1299,Corduroy Daiquiri,301,25.991071,0.892857
2663,Lost Lake,594,26.31746,1.047619
1124,Davy Jones's Locker,267,26.919643,1.053571
852,Canchánchara,203,33.071429,1.214286
460,Alex Day’s Daiquiri,113,23.609524,1.295238
2187,John De Piper’s Mojito,494,21.914286,1.295238


In [None]:
for ix, row in daiquiri_recipes.iterrows():
    distance_h, matches = recipe_distance_emd(
        recipes.dropna(subset=["volume_fraction"]),
        465,
        row["recipe_id"],
        substitution_levels,
        lambda x: 1 * x,
        lambda x: 10 * x,
    )
    distance_d, matches = recipe_distance_emd(
        recipes.dropna(subset=["volume_fraction"]),
        21,
        row["recipe_id"],
        substitution_levels,
        lambda x: 1 * x,
        lambda x: 10 * x,

    )
    daiquiri_recipes.at[ix, "distance_daiquiri"] = distance_d
    daiquiri_recipes.at[ix, "distance_hemingway"] = distance_h
daiquiri_recipes.sort_values(by="distance_daiquiri", ascending=True).drop_duplicates(
    subset=["recipe_id"]
)[["recipe_name", "recipe_id", "distance", "distance_daiquiri", "distance_hemingway"]].head(20)


Unnamed: 0,recipe_name,recipe_id,distance,distance_daiquiri,distance_hemingway
88,Daiquiri,21,63.942857,0.0,63.942857
1217,Dry Daiquiri,286,106.053995,2.367178,106.053995
2729,Regal Daiquiri,608,45.466667,20.87619,45.466667
2556,La Bomba Daiquiri,572,84.071543,21.337979,84.071543
460,Alex Day’s Daiquiri,113,85.6,23.609524,85.6
1297,Corduroy Daiquiri,301,64.083333,25.991071,64.083333
3354,Parisian Daiquiri,753,98.684209,26.928572,98.684209
1495,Erick Castro's Hemingway Daiquiri,342,6.408333,31.6875,6.408333
1345,Don's Special Daiquiri,310,114.6,35.914286,114.6
2450,Jocelyn Morin’s Daiquiri,551,107.739106,41.455649,107.739106


In [14]:
recipes.query("recipe_id == 21")

Unnamed: 0,recipe_id,recipe_name,ingredient_id,ingredient_name,ingredient_path,substitution_level,amount,volume_ml,volume_fraction,distance,distance_emd
88,21,Daiquiri,14,Simple Syrup,/113/14/,0.0,0.5,14.78675,0.142857,0.0,0.0
89,21,Daiquiri,15,Lime Juice,/7/15/,0.0,1.0,29.5735,0.285714,0.0,0.0
90,21,Daiquiri,198,Blended Lightly Aged Rum,/2/198/,0.0,2.0,59.147,0.571429,0.0,0.0


In [15]:
recipes.query("recipe_id == 256")

Unnamed: 0,recipe_id,recipe_name,ingredient_id,ingredient_name,ingredient_path,substitution_level,amount,volume_ml,volume_fraction,distance,distance_emd
1076,256,Daiquiri Clasico,15,Lime Juice,/7/15/,0.0,0.75,22.180125,0.25,64.476176,1.857143
1077,256,Daiquiri Clasico,130,Column Still Lightly Aged Rum,/2/130/,0.0,2.0,59.147,0.666666,64.476176,1.857143
1078,256,Daiquiri Clasico,194,Sugar,/156/194/,0.0,0.5,7.3934,0.083334,64.476176,1.857143


In [16]:
recipe_distance(
    recipes.dropna(subset=["volume_fraction"]),
    21,
    256,
    substitution_levels,
    lambda x: 1 * x,
    lambda x: 10 * x,
    lambda x: 20 * x,
)

(np.float64(64.47617601129261), [(0, 2), (1, 0), (2, 1)])