Load 4 part cocktails from cocktaildb backup, build a graph by edit distance.

In [None]:
import pandas as pd
import numpy as np
import networkx as nx

In [11]:
recipes = pd.read_csv("4_equal_parts-2025-08-02_91819.csv")

In [None]:
recipe_ingredients = recipes.groupby('recipe_name')['ingredient_id'].apply(set).to_dict()
ingredients = recipes[['ingredient_id', 'ingredient_name']].drop_duplicates()

recipe_names = list(recipe_ingredients.keys())
n = len(recipe_names)
distance_matrix = np.zeros((n, n), dtype=int)

for i, id1 in enumerate(recipe_names):
    for j, id2 in enumerate(recipe_names):
        if i <= j:
            diff = recipe_ingredients[id1] ^ recipe_ingredients[id2]
            distance = len(diff)
            distance_matrix[i, j] = distance
            distance_matrix[j, i] = distance  # symmetric

# Create a DataFrame for easier viewing
distance_df = pd.DataFrame(distance_matrix, index=recipe_names, columns=recipe_names)

# Count number of distance 2 connections for each recipe
distance_2_counts = (distance_df == 2).sum(axis=1)

# Order recipe_names by number of distance 2 connections, descending
recipe_names_by_distance2 = distance_2_counts.sort_values(ascending=False).index.tolist()

# Reorder distance_matrix so the order matches recipe_names_by_distance2
distance_df = distance_df.loc[recipe_names_by_distance2, recipe_names_by_distance2]
distance_matrix = distance_df.values

In [26]:
distance_df

Unnamed: 0,Last Word,Wordsmith,Fir Geddaboudit,Dirty Word,Bitter Last Words,La Ultima Palabra,Pete's Word,Ultimatum,Latest Word,Final Ward,...,Penultimate Word,Lush Life,Shaddock,Safe Word,Quemada's Idol,Ragnarök,Shrunken Skull,Sideways In Reverse,Third Ward,Walking Dead
Last Word,0,2,2,2,2,2,2,2,2,4,...,8,8,8,4,6,8,6,6,6,6
Wordsmith,2,0,2,2,2,2,2,2,2,4,...,8,8,8,4,4,8,6,6,6,6
Fir Geddaboudit,2,2,0,2,2,2,2,2,2,4,...,8,8,8,4,6,8,6,6,6,6
Dirty Word,2,2,2,0,2,2,2,2,2,4,...,8,8,8,4,6,8,6,6,6,6
Bitter Last Words,2,2,2,2,0,2,2,2,2,4,...,8,8,8,4,6,8,6,6,6,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ragnarök,8,8,8,8,8,8,8,8,8,6,...,6,6,4,8,8,0,8,8,8,8
Shrunken Skull,6,6,6,6,6,6,6,6,6,8,...,8,8,8,6,6,8,0,6,8,6
Sideways In Reverse,6,6,6,6,6,6,6,6,6,8,...,8,8,6,6,6,8,6,0,8,6
Third Ward,6,6,6,6,6,6,6,6,6,6,...,8,8,8,4,8,8,8,8,0,8


In [30]:
# Create a graph
G = nx.Graph()

# Add all recipe names as nodes
G.add_nodes_from(recipe_names_by_distance2)

# Prepare a list to collect table rows
distance2_table = []

# Add edges between recipes with distance = 2 and collect table data
for i, name1 in enumerate(recipe_names_by_distance2):
    for j, name2 in enumerate(recipe_names_by_distance2):
        if i < j:  # Only check upper triangle to avoid duplicates
            if distance_df.loc[name1, name2] == 2:
                print(f"Adding edge between {name1} and {name2}")
                ingredient_ids = recipe_ingredients[name1] ^ recipe_ingredients[name2]
                ingredient_names = ingredients[ingredients['ingredient_id'].isin(ingredient_ids)]['ingredient_name'].tolist()
                if len(ingredient_names) == 2:
                    G.add_edge(name1, name2, label=f"{ingredient_names[0]} <-> {ingredient_names[1]}")
                    distance2_table.append({
                        "recipe_name_1": name1,
                        "recipe_name_2": name2,
                        "ingredient_name_1": ingredient_names[0],
                        "ingredient_name_2": ingredient_names[1]
                    })
                else:
                    # If not exactly 2, skip or handle as needed
                    print(f"Skipping recipe pair {recipe_names[i]} and {recipe_names[j]} because they have {len(ingredient_names)} ingredients")

print(f"Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")
print(f"Recipes with distance 2 connections: {G.number_of_edges()}")

# Write to GML file
nx.write_gml(G, "recipe_distance_2_graph.gml")
print("Graph saved to recipe_distance_2_graph.gml")

# Write out the table as a CSV
distance2_df = pd.DataFrame(distance2_table)
distance2_df.to_csv("distance2_recipe_pairs.csv", index=False)
print("Table of recipe pairs with distance 2 saved to distance2_recipe_pairs.csv")

Adding edge between Last Word and Wordsmith
Adding edge between Last Word and Fir Geddaboudit
Adding edge between Last Word and Dirty Word
Adding edge between Last Word and Bitter Last Words
Adding edge between Last Word and La Ultima Palabra
Adding edge between Last Word and Pete's Word
Adding edge between Last Word and Ultimatum
Adding edge between Last Word and Latest Word
Adding edge between Last Word and The Last Monk
Adding edge between Last Word and Last Ditch
Adding edge between Last Word and Bad Word
Adding edge between Wordsmith and Fir Geddaboudit
Adding edge between Wordsmith and Dirty Word
Adding edge between Wordsmith and Bitter Last Words
Adding edge between Wordsmith and La Ultima Palabra
Adding edge between Wordsmith and Pete's Word
Adding edge between Wordsmith and Ultimatum
Adding edge between Wordsmith and Latest Word
Adding edge between Wordsmith and Final Voyage
Adding edge between Fir Geddaboudit and Dirty Word
Adding edge between Fir Geddaboudit and Bitter Last 

In [59]:
pd.set_option('display.max_rows', None)
distance_df.loc["Last Word"][distance_df.loc["Last Word"] == 4].sort_values(ascending=True)

Final Ward                 4
Julien Sorel               4
The Asterisk               4
Frozen In A Starry Void    4
Hipsters Last Word         4
Swan Song                  4
That's My Word             4
Right Word                 4
Adam's Words               4
Definitive Source          4
Final Voyage               4
Benedict's Word            4
Corpse Defiler #2          4
Industry Sour              4
Safe Word                  4
Name: Last Word, dtype: int64

In [50]:
[ingredients.query("ingredient_id == @i")["ingredient_name"].values[0] for i in recipe_ingredients["Last Word"]]

['Maraschino Liqueur', 'Green Chartreuse', 'Gin', 'Lime Juice']

In [65]:
[
    (recipe_name, [ingredients.query("ingredient_id == @i")["ingredient_name"].values[0] for i in recipe_ingredients[recipe_name]])
    for recipe_name in distance_df.loc["Last Word"][distance_df.loc["Last Word"] == 4].sort_values(ascending=True).index
]

[('Final Ward',
  ['Lemon Juice', 'Green Chartreuse', 'Rye', 'Maraschino Liqueur']),
 ('Julien Sorel',
  ['Lemon Juice', 'Green Chartreuse', 'Cognac', 'Maraschino Liqueur']),
 ('The Asterisk',
  ['Lemon Juice', 'Green Chartreuse', 'Cognac', 'Maraschino Liqueur']),
 ('Frozen In A Starry Void',
  ['Allspice Dram', 'Maraschino Liqueur', 'Rye', 'Lime Juice']),
 ('Hipsters Last Word',
  ['Fernet-Branca', 'Maraschino Liqueur', 'Rye', 'Lime Juice']),
 ('Swan Song',
  ['Green Chartreuse', 'Smith and Cross', 'Dry Curaçao', 'Lime Juice']),
 ("That's My Word", ['Yellow Chartreuse', 'St. Germain', 'Gin', 'Lime Juice']),
 ('Right Word', ['St. Germain', 'Gin', 'Lillet Blanc', 'Lime Juice']),
 ("Adam's Words",
  ['Green Chartreuse', 'Ginger Liqueur', 'Smith and Cross', 'Lime Juice']),
 ('Definitive Source',
  ['Allspice Dram', 'Maraschino Liqueur', 'Smith and Cross', 'Lime Juice']),
 ('Final Voyage',
  ['Green Chartreuse', 'Apricot Liqueur', 'Black Rum', 'Lime Juice']),
 ("Benedict's Word",
  ['Maras

In [57]:
[ingredients.query("ingredient_id == @i")["ingredient_name"].values[0] for i in recipe_ingredients["Julien Sorel"]]

['Lemon Juice', 'Green Chartreuse', 'Cognac', 'Maraschino Liqueur']

In [46]:
ingredients

Unnamed: 0,ingredient_id,ingredient_name
0,26,Appleton Estate 12 Year Old
1,39,Dry Curaçao
2,34,Amaro Nonino Quintessentia
3,32,Lemon Juice
4,22,Rye
5,41,Benedictine
6,40,Yellow Chartreuse
8,4,Gin
12,10,Bourbon
14,42,Aperol
