In [None]:
import pandas as pd
from ortools.sat.python import cp_model


In [5]:
df = pd.read_csv("4_equal_parts_cocktails.csv")

df.head()



Unnamed: 0,recipe_id,ingredient_id,name,name.1,path
0,12,26,Paper Boat,Rum,/2/26/
1,12,39,Paper Boat,Dry Curaçao,/39/
2,12,34,Paper Boat,Amaro Nonino Quintessentia,/63/34/
3,12,32,Paper Boat,Lemon Juice,/7/32/
4,13,22,Monte Cassino,Rye,/1/22/


In [8]:
# Convert df to list of lists: each inner list contains ingredients for each recipe
recipes_list = df.groupby('name')['name.1'].apply(list).tolist()



In [None]:
def categorize_ingredients(recipes):
    """
    Partition ingredients into four disjoint categories so that every
    recipe contains exactly one ingredient from each category.

    Parameters
    ----------
    recipes : Iterable[Iterable[str]]
        Each recipe is an iterable of its four ingredient names.

    Returns
    -------
    list[list[str]]
        Four lists of ingredient names ordered by ascending size.

    Raises
    ------
    ValueError
        If no disjoint 4-category assignment exists.
    """
    k = 4                                   # number of categories
    ingredients = sorted({ing for r in recipes for ing in r})
    n = len(ingredients)
    idx = {ing: i for i, ing in enumerate(ingredients)}

    model = cp_model.CpModel()
    x = {(i, c): model.NewBoolVar(f"x_{i}_{c}")      # 1 ⇔ ingredient i in category c
         for i in range(n) for c in range(k)}

    # each ingredient in exactly one category
    for i in range(n):
        # ingredient appears in 1 or 2 categories
        cnt = sum(x[i, c] for c in range(k))   # how many categories this ingredient appears in
        model.Add(cnt >= 1)                    # must appear in at least one
        model.Add(cnt <= 2)   

    # every recipe is a “rainbow”: one ingredient per category
    for r in recipes:
        ids = [idx[ing] for ing in r]
        for c in range(k):
            model.Add(sum(x[i, c] for i in ids) == 1)

    # mild symmetry-breaking (optional but speeds up search):
    model.Add(x[0, 0] == 1)

    overlap_penalty = []
    for i in range(n):
        overlap_penalty.append(sum(x[i, c] for c in range(k)) - 1)  # 0 if unique, >0 if duplicated
    model.Minimize(sum(overlap_penalty))

    dup = [sum(x[i, c] for c in range(k)) - 1 for i in range(n)]
    model.Minimize(sum(dup))

    solver = cp_model.CpSolver()
    solver.parameters.maximize = False
    solver.parameters.num_search_workers = 8
    _ = solver.Solve(model)

    best_overlap = int(solver.ObjectiveValue())      # keep this value fixed
    model.Add(sum(dup) == best_overlap)

    # Stage 2  – with duplicates frozen, minimise total assignments
    tot = sum(x[i, c] for i in range(n) for c in range(k))
    model.Minimize(tot)

    _ = solver.Solve(model)

    solver = cp_model.CpSolver()
    status = solver.Solve(model)
    if status != cp_model.OPTIMAL:
        print("No disjoint four-category cover is possible.")

    cats = [[] for _ in range(k)]
    for ing, i in idx.items():
        for c in range(k):
            if solver.Value(x[i, c]):
                cats[c].append(ing)

    cats.sort(key=len)                      # smallest category first
    return cats

In [49]:
cats = categorize_ingredients(recipes_list)

AttributeError: 'ortools.sat.python.cp_model_helper.SumArray' object has no attribute 'Between'

In [43]:
len(cats)

4

In [44]:
cats

[['Aged Rum',
  'Amaro Montenegro',
  'Aperol',
  'Averna',
  'Batavia Arrack',
  'Bigallet China China',
  'Blended Scotch',
  'Campari',
  'Cocchi Americano',
  'Cointreau',
  'Cynar',
  'Dark Jamaican Rum',
  'Dolin Génépy',
  'Douglas Fir Eau De Vie',
  'Dry Sherry',
  'Genever',
  'Ginger Liqueur',
  'Grapefruit Juice',
  'Grenadine',
  'Jaegermeister',
  'Lillet Blanc',
  'Maraschino Liqueur',
  'Mezcal',
  'Peach Liqueur',
  'Pineapple Rum',
  'Pisco',
  'Rhubarb Syrup',
  'Rum',
  'Simple Syrup',
  'St. Germain',
  'Sweet Vermouth',
  'Velvet Falernum',
  'Zwack Unicum'],
 ['Aged Rum',
  'Amaro Sibilla',
  'Aperol',
  'Aromas De Montserrat',
  'Averna',
  'Barenjager',
  'Batavia Arrack',
  'Benedictine',
  'Bigallet China China',
  'Bison Grass Vodka',
  'Blended Scotch',
  'Bourbon',
  'Campari',
  'Cherry Heering',
  'Cocchi Americano',
  'Cognac',
  'Cointreau',
  'Crème De Pamplemousse',
  'Cynar',
  'Damiana Liqueur',
  'Dark Jamaican Rum',
  'Demerara Rum',
  'Dolin Géné

In [45]:
[len(cat) for cat in cats]

[33, 63, 63, 64]