In [1]:
# pip install pulp pandas
import re, json, math
import pandas as pd
import pulp as pl

# --- Load and prep (for goods.csv) ---
# Expected columns: Name, Level, Time (e.g. "1 h 30 min"), XP, Needs (e.g. "Corn (1), Wheat (2)")
df = pd.read_csv("/Users/madelinec/Downloads/Hay Day Project/goods.csv")

# Normalize column names we need
df["Name"]  = df["Name"].astype(str).str.strip()
df["Level"] = df["Level"].astype(int)
df["XP"]    = df["XP"].astype(float)
df["Time"]  = df["Time"].astype(str).str.strip()
if "Needs" not in df.columns:
    df["Needs"] = ""

def parse_time_to_minutes(s: str) -> float:
    """Parse '2 min', '1 h 30 min', '6 h' -> minutes (float)."""
    s = (s or "").strip().lower().replace("\u00a0", " ")  # NBSP
    s = s.replace("hours", "h").replace("hour", "h").replace("hrs", "h").replace("hr", "h")
    s = s.replace("minutes", "min").replace("minute", "min")
    h = 0; m = 0
    mh = re.search(r'(\d+)\s*h', s)
    mm = re.search(r'(\d+)\s*min', s)
    if mh: h = int(mh.group(1))
    if mm: m = int(mm.group(1))
    if not mh and not mm:
        # bare number? assume minutes
        try: return float(s)
        except: return 0.0
    return 60.0*h + float(m)

def parse_needs_any(cell: str) -> dict:
    """
    Parse Needs either as JSON or as 'Item (q), Other (2)'.
    Returns dict[str, float].
    """
    if cell is None or (isinstance(cell, float) and pd.isna(cell)):
        return {}
    s = str(cell).strip().replace("\u00a0", " ")
    if s == "" or s == "{}":
        return {}
    # Try JSON first
    try:
        d = json.loads(s)
        out = {}
        for k, v in d.items():
            k2 = str(k).strip()
            if k2:
                out[k2] = float(v)
        return out
    except Exception:
        pass
    # Fallback: "Item (q), Other (2)"
    out = {}
    for part in re.split(r",\s*", s):
        m = re.match(r"(.+?)\s*\(\s*([0-9]+(?:\.[0-9]+)?)\s*\)\s*$", part)
        if not m:
            continue
        item = m.group(1).strip()
        qty  = float(m.group(2))
        if item:
            out[item] = out.get(item, 0.0) + qty
    return out

# Parse time/needs
df["time_min"] = df["Time"].apply(parse_time_to_minutes)
df["needs"]    = df["Needs"].apply(parse_needs_any)

# Optional: treat base crops that list themselves as "Name (1)" as having no real ingredients
# (common in your CSV where field crops show 'Wheat (1)').
def clear_self_need_for_crops(row):
    name = str(row["Name"]).strip()
    needs = dict(row["needs"])
    if needs.get(name) == 1 and len(needs) == 1:
        return {}
    return needs
df["needs"] = df.apply(clear_self_need_for_crops, axis=1)


def optimize_plan(player_level: int,
                  T_hours: float = 1.0,
                  allow_buy: bool = False):
    """
    Global-time optimizer (no per-machine/source caps).
    - Maximizes total XP in a fixed time horizon T_hours (minutes = T_hours*60).
    - Enforces dependency production: ingredients must be produced too.
    - If allow_buy=True, ingredients can be 'bought' (no XP) to satisfy needs.
    """
    # Filter by level
    avail = df[df["Level"] <= player_level].copy()

    # Global time capacity (minutes)
    total_cap_min = max(0.0, T_hours * 60.0)

    # Index products
    items = avail["Name"].tolist()

    xp       = {r["Name"]: float(r["XP"])       for _, r in avail.iterrows()}
    time_min = {r["Name"]: float(r["time_min"]) for _, r in avail.iterrows()}
    needs    = {r["Name"]: dict(r["needs"])     for _, r in avail.iterrows()}

    # Ingredient set limited to unlocked/craftable items
    craftable = set(items)
    ingredients = set()
    for d in needs.values():
        for k in d.keys():
            if k in craftable:
                ingredients.add(k)

    # --- Build LP ---
    model = pl.LpProblem("HayDay_XP_Max_GlobalTime", pl.LpMaximize)

    # Production variables
    # (sanitize names for variable ids)
    def varname(prefix, name):
        return f"{prefix}_{re.sub(r'[^A-Za-z0-9_]+','_', name)}"

    x = {i: pl.LpVariable(varname("x", i), lowBound=0) for i in items}

    # Optional buy variables (no XP)
    b = {}
    if allow_buy:
        for k in ingredients:
            b[k] = pl.LpVariable(varname("b", k), lowBound=0)

    # Objective: maximize total XP
    model += pl.lpSum(xp[i] * x[i] for i in items)

    # Global time capacity (sum of all item minutes)
    model += pl.lpSum(time_min[i] * x[i] for i in items) <= total_cap_min, "global_time_cap"

    # Ingredient balance: produce (and/or buy) at least what you consume
    for k in ingredients:
        consume = pl.lpSum(needs[i].get(k, 0.0) * x[i] for i in items)
        if allow_buy:
            model += x[k] + b[k] >= consume, f"balance_{re.sub(r'[^A-Za-z0-9_]+','_',k)}"
        else:
            model += x[k] >= consume, f"balance_{re.sub(r'[^A-Za-z0-9_]+','_',k)}"

    # Solve
    model.solve(pl.PULP_CBC_CMD(msg=False))

    # Extract plan
    plan = []
    total_xp = 0.0
    total_time_used = 0.0

    for i in items:
        qty = x[i].value() or 0.0
        if qty > 1e-6:
            plan.append({
                "item": i,
                "qty": qty,
                "xp_each": xp[i],
                "xp_total": xp[i] * qty,
                "time_min_each": time_min[i],
                "time_min_total": time_min[i] * qty
            })
            total_xp += xp[i] * qty
            total_time_used += time_min[i] * qty

    plan_df = pd.DataFrame(plan).sort_values(["time_min_each", "xp_each"], ascending=[False, False]).reset_index(drop=True)
    xp_per_hour = total_xp / T_hours if T_hours > 0 else float("nan")

    return {
        "XP_total": total_xp,
        "XP_per_hour": xp_per_hour,
        "total_time_used_min": total_time_used,
        "time_capacity_min": total_cap_min,
        "plan": plan_df
    }

# Example:
results = optimize_plan(player_level=2000, T_hours=48.0, allow_buy=False)  # 1h horizon ⇒ XP/hour
print(results["XP_total"], results["XP_per_hour"])
print(results["plan"].head(20).to_string(index=False))

ValueError: invalid literal for int() with base 10: '34 (Rep. 3)'

In [3]:
# hayday_variety_with_ingredients.py
# Optimize XP with ingredient production + variety across finals, using goods.csv

import re, json
from functools import lru_cache
from typing import Iterable, Optional
import pandas as pd
import pulp as pl

CSV_PATH = "/Users/madelinec/Downloads/Hay Day Project/goods.csv"  # change if needed

# ----------------------------
# Normalization & parsing
# ----------------------------

def norm(s: str) -> str:
    """Normalize a name/key for matching."""
    if pd.isna(s): return ""
    s = str(s)
    s = s.replace("\u00a0"," ")                    # NBSP -> space
    s = s.replace("–","-").replace("—","-")        # en/em dash -> hyphen
    s = re.sub(r"\s+", " ", s)                     # collapse spaces
    s = s.strip().lower()
    return s

def parse_first_int(x, default=0) -> int:
    """Extract first integer from a cell like '34 (Rep. 3)'."""
    if pd.isna(x): return default
    m = re.search(r"\d+", str(x))
    return int(m.group()) if m else default

def parse_time_to_minutes(s: str) -> float:
    """Parse '2 min', '1 h 30 min', '6 h' into minutes."""
    s = (s or "").replace("\u00a0"," ").lower().strip()
    s = (s.replace("hours","h").replace("hour","h")
           .replace("hrs","h").replace("hr","h")
           .replace("minutes","min").replace("minute","min"))
    h = re.search(r"(\d+)\s*h", s)
    m = re.search(r"(\d+)\s*min", s)
    if not h and not m:
        try: return float(s)
        except: return 0.0
    return (int(h.group(1))*60 if h else 0) + (int(m.group(1)) if m else 0)

def parse_needs(cell: str) -> dict:
    """Parse Needs either as JSON or 'Item (q), Other (2)' -> {norm(item): qty}."""
    if cell is None or (isinstance(cell, float) and pd.isna(cell)): return {}
    raw = str(cell).replace("\u00a0"," ").strip()
    if raw == "" or raw == "{}": return {}
    # Try JSON
    try:
        d = json.loads(raw)
        return {norm(k): float(v) for k, v in d.items()}
    except Exception:
        pass
    # Fallback "Item (q)" list
    out = {}
    for part in re.split(r",\s*", raw):
        m = re.match(r"(.+?)\s*\(\s*([0-9]+(?:\.[0-9]+)?)\s*\)\s*$", part)
        if m:
            out[norm(m.group(1))] = out.get(norm(m.group(1)), 0.0) + float(m.group(2))
    return out

# ----------------------------
# Load & build normalized dataframe
# ----------------------------

df = pd.read_csv(CSV_PATH)

# Preserve original display name
df["Name"] = df["Name"].astype(str)

# Normalized fields
df["name_norm"] = df["Name"].apply(norm)
df["Level_num"] = df["Level"].apply(parse_first_int)
df["time_min"]  = df["Time"].apply(parse_time_to_minutes)
df["xp"]        = pd.to_numeric(df["XP"], errors="coerce").fillna(0.0)
df["needs_raw"] = df.get("Needs", "").apply(parse_needs)

# Clear trivial self-need for base crops like "wheat (1)"
def clear_self_need(row):
    n = row["name_norm"]
    nd = dict(row["needs_raw"])
    if nd.get(n, 0) == 1 and len(nd) == 1:
        return {}
    return nd

df["needs_norm"] = df.apply(clear_self_need, axis=1)

# Map normalized name -> original display name (for pretty output)
canon_name = df.groupby("name_norm")["Name"].first().to_dict()

# ----------------------------
# Helper: finals set (normalized)
# ----------------------------

def finals_in_unlocked(av: pd.DataFrame) -> set:
    """Finals = items NOT used as an ingredient in any other unlocked recipe."""
    items = set(av["name_norm"])
    used  = set()
    for _, r in av.iterrows():
        for ing in r["needs_norm"].keys():
            if ing in items:
                used.add(ing)
    return items - used

# ----------------------------
# Core optimizer (accounts ingredients + variety on finals)
# ----------------------------

def optimize_variety_with_ingredients(
    player_level: int,
    total_finals: int,
    finals_subset: str | Iterable[str] = "finals",  # "finals" | "finals_zero_needs" | iterable of display names
    min_distinct: Optional[int] = None,             # require at least this many different finals
    max_per_final: Optional[int] = None,            # cap any single final to this many
    time_cap_hours: Optional[float] = None          # optional global time cap across all crafts
) -> dict:
    """
    Maximize XP across ALL produced items (finals + exactly-required ingredients),
    subject to:
      - Ingredient EXACTNESS (no extra ingredient farming):
          for k used as ingredient & not a final: x_k == Σ_i needs[i][k] * x_i
          for k used as ingredient & also a final: x_k >= Σ_i needs[i][k] * x_i
      - EXACTLY total_finals units among finals (integers)
      - Variety (min_distinct, max_per_final) over finals
      - Optional global time cap across all crafts
    """
    # Unlocked
    avail = df[df["Level_num"] <= player_level].copy()
    if avail.empty:
        return {"status": "NoItems", "reason": "No unlocked items at this level.", "plan": pd.DataFrame()}

    items_set = set(avail["name_norm"])
    xp        = {r["name_norm"]: float(r["xp"])        for _, r in avail.iterrows()}
    tmin      = {r["name_norm"]: float(r["time_min"])  for _, r in avail.iterrows()}
    needs_map = {r["name_norm"]: dict(r["needs_norm"]) for _, r in avail.iterrows()}

    # Finals universe
    finals_all = finals_in_unlocked(avail)
    if isinstance(finals_subset, str):
        if finals_subset == "finals":
            finals_raw = set(finals_all)
        elif finals_subset == "finals_zero_needs":
            finals_raw = {i for i in finals_all if len(needs_map.get(i, {})) == 0}
        else:
            return {"status": "BadArg", "reason": "finals_subset must be 'finals', 'finals_zero_needs', or an iterable.", "plan": pd.DataFrame()}
    else:
        # Map user display names to normalized
        user_norm = {norm(n) for n in finals_subset}
        finals_raw = user_norm & items_set

    if not finals_raw:
        return {"status": "NoFinals", "reason": "Finals set empty at this level.", "plan": pd.DataFrame()}

    # Feasibility precheck: ingredient closure must be unlocked (normalized)
    @lru_cache(None)
    def closure_ok(u: str) -> bool:
        for ing, q in needs_map.get(u, {}).items():
            if ing not in items_set:
                return False
            if not closure_ok(ing):
                return False
        return True

    finals = {i for i in finals_raw if closure_ok(i)}
    blocked = sorted(list(finals_raw - finals))
    if not finals:
        return {
            "status": "Infeasible",
            "reason": "All candidate finals need non-matching/locked ingredients.",
            "blocked_finals": [canon_name.get(b, b) for b in blocked],
            "plan": pd.DataFrame()
        }

    # Ingredient (normalized) items
    ingredient_items = set()
    for nd in needs_map.values():
        for k in nd.keys():
            if k in items_set:
                ingredient_items.add(k)

    # Optional quick time lower-bound check
    @lru_cache(None)
    def eff_minutes(u: str) -> float:
        total = tmin.get(u, 0.0)
        for ing, q in needs_map.get(u, {}).items():
            total += q * eff_minutes(ing)
        return total

    if time_cap_hours is not None:
        cap_min = max(0.0, 60.0 * float(time_cap_hours))
        # crude LB: use fastest finals to fill total_finals honoring max_per_final and min_distinct
        em_sorted = sorted([(f, eff_minutes(f)) for f in finals], key=lambda kv: kv[1])
        qty_needed = int(total_finals)
        used = {f: 0 for f, _ in em_sorted}
        time_lb = 0.0

        # satisfy min_distinct with 1 each of the fastest ones
        if min_distinct:
            take = min(min_distinct, len(em_sorted), qty_needed)
            for f, em in em_sorted[:take]:
                time_lb += em
                qty_needed -= 1
                used[f] = 1

        idx = 0
        while qty_needed > 0 and idx < len(em_sorted):
            f, em = em_sorted[idx]
            cap_left = (max_per_final - used[f]) if max_per_final else qty_needed
            if cap_left <= 0:
                idx += 1
                continue
            k = min(qty_needed, cap_left)
            time_lb += k * em
            used[f] += k
            qty_needed -= k
            if used[f] == (max_per_final or used[f]):  # move on if capped
                idx += 1

        if time_lb > cap_min + 1e-9:
            return {
                "status": "Infeasible",
                "reason": f"Time cap too tight. Lower bound {time_lb:.1f} min > cap {cap_min:.1f} min.",
                "blocked_finals": [canon_name.get(b, b) for b in blocked],
                "plan": pd.DataFrame()
            }

    # ----------------------------
    # Build MILP
    # ----------------------------
    model = pl.LpProblem("HayDay_Max_XP_WithIngredients_Variety_Norm", pl.LpMaximize)
    v = lambda p, s: f"{p}_{re.sub(r'[^A-Za-z0-9_]+','_', s)}"

    # Integer vars for ALL craftable items (finals + ingredients)
    x = {i: pl.LpVariable(v("x", i), lowBound=0, cat="Integer") for i in items_set}

    # Variety binaries for finals (if min_distinct)
    y = {}
    if min_distinct is not None:
        M = max_per_final if (max_per_final and max_per_final > 0) else int(total_finals)
        y = {i: pl.LpVariable(v("y", i), lowBound=0, upBound=1, cat="Binary") for i in finals}
        for i in finals:
            model += x[i] <= M * y[i], v("link", i)
        model += pl.lpSum(y[i] for i in finals) >= int(min_distinct), "min_distinct_finals"

    # Objective: XP across all crafts
    model += pl.lpSum(xp[i] * x[i] for i in items_set)

    # Exactly N finals
    model += pl.lpSum(x[i] for i in finals) == int(total_finals), "exact_total_finals"

    # Per-final cap
    if max_per_final is not None:
        for i in finals:
            model += x[i] <= int(max_per_final), v("cap_final", i)

    # Ingredient EXACTNESS (no extra ingredient XP farming)
    for k in (ingredient_items - finals):
        consume = pl.lpSum(needs_map[i].get(k, 0.0) * x[i] for i in items_set)
        model += x[k] == consume, v("balance_eq", k)
    for k in (ingredient_items & finals):
        consume = pl.lpSum(needs_map[i].get(k, 0.0) * x[i] for i in items_set)
        model += x[k] >= consume, v("balance_ge", k)

    # Optional global time cap (across finals + ingredients)
    if time_cap_hours is not None:
        total_cap_min = max(0.0, 60.0 * float(time_cap_hours))
        model += pl.lpSum(tmin[i] * x[i] for i in items_set) <= total_cap_min, "global_time_cap"

    # Solve
    model.solve(pl.PULP_CBC_CMD(msg=False))
    status = pl.LpStatus[model.status]

    # Extract
    cols = ["item","is_final","qty","xp_each","xp_total","time_min_each","time_min_total"]
    rows = []
    total_xp = total_time = finals_cnt = 0

    if status == "Optimal":
        for i in items_set:
            q = int(round((x[i].value() or 0)))
            if q > 0:
                is_final = i in finals
                rows.append({
                    "item": canon_name.get(i, i),  # pretty display
                    "is_final": is_final,
                    "qty": q,
                    "xp_each": xp[i],
                    "xp_total": xp[i]*q,
                    "time_min_each": tmin[i],
                    "time_min_total": tmin[i]*q
                })
                total_xp += xp[i]*q
                total_time += tmin[i]*q
                if is_final: finals_cnt += q

    plan = pd.DataFrame(rows, columns=cols)
    if not plan.empty:
        plan = plan.sort_values(["is_final","xp_each","time_min_each"],
                                ascending=[False, False, True]).reset_index(drop=True)

    return {
        "status": status,
        "blocked_finals": [canon_name.get(b, b) for b in blocked],
        "XP_total": total_xp,
        "finals_made": finals_cnt,
        "finals_target": int(total_finals),
        "distinct_finals": int(plan.loc[plan["is_final"], "item"].nunique()) if not plan.empty else 0,
        "total_time_used_min": total_time,
        "plan": plan
    }

# ----------------------------
# Example usage
# ----------------------------

# Example: level 25, exactly 12 finals, at least 4 distinct finals, max 5 of any one final.
res = optimize_variety_with_ingredients(
    player_level=25,
    total_finals=12,
    finals_subset="finals",   # or "finals_zero_needs" or {"Cookie","Bread","Popcorn"}
    min_distinct=4,
    max_per_final=5,
    time_cap_hours=None       # add a number (e.g., 3.0) to limit total craft time
)
print("Status:", res["status"])
print("Blocked finals (mismatch/locked):", res["blocked_finals"])
print("XP_total:", res["XP_total"])
print("Finals made / target:", res["finals_made"], "/", res["finals_target"])
print("Distinct finals:", res["distinct_finals"])
print("Total time used (min):", res["total_time_used_min"])
print("\nPlan:")
if res["plan"].empty:
    print("<empty>")
else:
    print(res["plan"].to_string(index=False))

Status: Optimal
Blocked finals (mismatch/locked): []
XP_total: 1354.0
Finals made / target: 12 / 12
Distinct finals: 3
Total time used (min): 19246.0

Plan:
         item  is_final  qty  xp_each  xp_total  time_min_each  time_min_total
 Violet dress      True    5     39.0     195.0          135.0           675.0
  Wooly chaps      True    2     37.0      74.0           90.0           180.0
   Cheesecake      True    5     34.0     170.0          240.0          1200.0
       Cheese     False    5     15.0      75.0           60.0           300.0
Cotton fabric     False   12     13.0     156.0           30.0           360.0
       Cookie     False    5     13.0      65.0           60.0           300.0
    Raspberry     False    5      9.0      45.0         1080.0          5400.0
       Cotton     False   36      6.0     216.0          150.0          5400.0
       Indigo     False    5      5.0      25.0          120.0           600.0
         Wool     False    6      5.0      30.0      

In [4]:
# hayday_variety_with_ingredients_flexible.py
# Optimize XP with ingredient production + variety across finals, using goods.csv
import re, json
from functools import lru_cache
from typing import Iterable, Optional
import pandas as pd
import pulp as pl
CSV_PATH = "/Users/madelinec/Downloads/Hay Day Project/goods.csv"  # change if needed
# ----------------------------
# Normalization & parsing
# ----------------------------
def norm(s: str) -> str:
    """Normalize a name/key for matching."""
    if pd.isna(s): return ""
    s = str(s)
    s = s.replace("\u00a0"," ")              # NBSP -> space
    s = s.replace("–","-").replace("—","-")      # en/em dash -> hyphen
    s = re.sub(r"\s+", " ", s)                 # collapse spaces
    s = s.strip().lower()
    return s
def parse_first_int(x, default=0) -> int:
    """Extract first integer from a cell like '34 (Rep. 3)'."""
    if pd.isna(x): return default
    m = re.search(r"\d+", str(x))
    return int(m.group()) if m else default
def parse_time_to_minutes(s: str) -> float:
    """Parse '2 min', '1 h 30 min', '6 h' into minutes."""
    s = (s or "").replace("\u00a0"," ").lower().strip()
    s = (s.replace("hours","h").replace("hour","h")
               .replace("hrs","h").replace("hr","h")
               .replace("minutes","min").replace("minute","min"))
    h = re.search(r"(\d+)\s*h", s)
    m = re.search(r"(\d+)\s*min", s)
    if not h and not m:
        try: return float(s)
        except: return 0.0
    return (int(h.group(1))*60 if h else 0) + (int(m.group(1)) if m else 0)
def parse_needs(cell: str) -> dict:
    """Parse Needs either as JSON or 'Item (q), Other (2)' -> {norm(item): qty}."""
    if cell is None or (isinstance(cell, float) and pd.isna(cell)): return {}
    raw = str(cell).replace("\u00a0"," ").strip()
    if raw == "" or raw == "{}": return {}
    # Try JSON
    try:
        d = json.loads(raw)
        return {norm(k): float(v) for k, v in d.items()}
    except Exception:
        pass
    # Fallback "Item (q)" list
    out = {}
    for part in re.split(r",\s*", raw):
        m = re.match(r"(.+?)\s*\(\s*([0-9]+(?:\.[0-9]+)?)\s*\)\s*$", part)
        if m:
            out[norm(m.group(1))] = out.get(norm(m.group(1)), 0.0) + float(m.group(2))
    return out
# ----------------------------
# Load & build normalized dataframe
# ----------------------------
df = pd.read_csv(CSV_PATH)

# === NEW YIELD_QTY CALCULATION ===
# Default yield is 1.0
df['Yield_qty'] = 1.0
# Set yield to 2.0 for items sourced from Field, Tree, or Bush (case-insensitive)
df.loc[df['Source'].astype(str).str.contains(r'Field|Tree|Bush', case=False, na=False), 'Yield_qty'] = 2.0
# =================================

# Preserve original display name
df["Name"] = df["Name"].astype(str)
# Normalized fields
df["name_norm"] = df["Name"].apply(norm)
df["Level_num"] = df["Level"].apply(parse_first_int)
df["time_min"]  = df["Time"].apply(parse_time_to_minutes)
df["xp"]        = pd.to_numeric(df["XP"], errors="coerce").fillna(0.0)
df["needs_raw"] = df.get("Needs", "").apply(parse_needs)
# Clear trivial self-need for base crops like "wheat (1)"
def clear_self_need(row):
    n = row["name_norm"]
    nd = dict(row["needs_raw"])
    if nd.get(n, 0) == 1 and len(nd) == 1:
        return {}
    return nd
df["needs_norm"] = df.apply(clear_self_need, axis=1)
# Map normalized name -> original display name (for pretty output)
canon_name = df.groupby("name_norm")["Name"].first().to_dict()
# ----------------------------
# Helper: finals set (normalized)
# ----------------------------
def finals_in_unlocked(av: pd.DataFrame) -> set:
    """Finals = items NOT used as an ingredient in any other unlocked recipe."""
    items = set(av["name_norm"])
    used  = set()
    for _, r in av.iterrows():
        for ing in r["needs_norm"].keys():
            if ing in items:
                used.add(ing)
    return items - used
# ----------------------------
# Core optimizer (accounts ingredients + variety on finals)
# ----------------------------
def optimize_variety_with_ingredients(
    player_level: int,
    total_finals: Optional[int] = None,
    finals_subset: str | Iterable[str] = "finals",  # "finals" | "finals_zero_needs" | iterable of display names
    min_distinct: Optional[int] = None,             # require at least this many different finals
    max_per_final: Optional[int] = None,            # cap any single final to this many
    time_cap_hours: Optional[float] = None,         # optional global time cap across all crafts
    default_time_if_unbounded: float = 24.0          # used ONLY when total_finals and cap are both None
) -> dict:
    """
    Maximize XP across ALL produced items (finals + exactly-required ingredients),
    while allowing total_finals / min_distinct / max_per_final to be omitted.
    If total_finals is None and time_cap_hours is None, we apply a default time cap
    (default_time_if_unbounded = 1.0 hour) so the MILP stays bounded.
    """
    # ---------- Unlocked ----------
    avail = df[df["Level_num"] <= player_level].copy()
    if avail.empty:
        return {"status": "NoItems", "reason": "No unlocked items at this level.", "plan": pd.DataFrame()}
    items_set = set(avail["name_norm"])
    xp        = {r["name_norm"]: float(r["xp"])      for _, r in avail.iterrows()}
    tmin      = {r["name_norm"]: float(r["time_min"])  for _, r in avail.iterrows()}
    needs_map = {r["name_norm"]: dict(r["needs_norm"]) for _, r in avail.iterrows()}
    # New: Yield map for balance constraints
    yield_map = {r["name_norm"]: float(r["Yield_qty"]) for _, r in avail.iterrows()}
    # ---------- Finals universe ----------
    if isinstance(finals_subset, str):
        finals_all = finals_in_unlocked(avail)
        if finals_subset == "finals":
            # FIXED: Use finals_all directly (all non-ingredients)
            finals_raw = finals_all 
        elif finals_subset == "finals_zero_needs":
            # This option filters to only finals that require 0 ingredients
            finals_raw = {i for i in finals_all if len(needs_map.get(i, {})) == 0}
        else:
            return {"status": "BadArg", "reason": "finals_subset must be 'finals', 'finals_zero_needs', or an iterable.", "plan": pd.DataFrame()}
    else:
        user_norm = {norm(n) for n in finals_subset}
        finals_raw = user_norm & items_set
    if not finals_raw:
        return {"status": "NoFinals", "reason": "Finals set empty at this level.", "plan": pd.DataFrame()}
    # ---------- Feasibility: ingredient closure must be unlocked ----------
    @lru_cache(None)
    def closure_ok(u: str) -> bool:
        for ing, q in needs_map.get(u, {}).items():
            if ing not in items_set:
                return False
            if not closure_ok(ing):
                return False
        return True
    finals = {i for i in finals_raw if closure_ok(i)}
    blocked = sorted(list(finals_raw - finals))
    if not finals:
        return {
            "status": "Infeasible",
            "reason": "All candidate finals need non-matching/locked ingredients.",
            "blocked_finals": [canon_name.get(b, b) for b in blocked],
            "plan": pd.DataFrame()
        }
    # Ingredient items
    ingredient_items = set()
    for nd in needs_map.values():
        for k in nd.keys():
            if k in items_set:
                ingredient_items.add(k)
    # ---------- Effective time cap logic ----------
    effective_cap_hours = time_cap_hours
    used_default_cap = False
    if total_finals is None and time_cap_hours is None:
        effective_cap_hours = float(default_time_if_unbounded)  # keep problem bounded
        used_default_cap = True
    # ---------- Build MILP ----------
    model = pl.LpProblem("HayDay_Max_XP_WithIngredients_Variety_Flexible", pl.LpMaximize)
    v = lambda p, s: f"{p}{re.sub(r'[^A-Za-z0-9]+','_', s)}"
    # Integer vars for ALL craftable items
    x = {i: pl.LpVariable(v("x", i), lowBound=0, cat="Integer") for i in items_set}
    # Variety binaries over finals only if min_distinct is requested
    y = {}
    if min_distinct is not None:
        # Choose M for link constraint x[i] <= M*y[i]
        if max_per_final and max_per_final > 0:
            M = int(max_per_final)
        else:
            if effective_cap_hours is not None:
                fastest = min(max(tmin.get(f, 1.0), 1.0) for f in finals)  # avoid zero
                M = max(1, int((effective_cap_hours * 60.0) // fastest))
            else:
                # Fallback generous M; not tight but safe
                M = 9999
        y = {i: pl.LpVariable(v("y", i), lowBound=0, upBound=1, cat="Binary") for i in finals}
        for i in finals:
            model += x[i] <= M * y[i], v("link", i)
        model += pl.lpSum(y[i] for i in finals) >= int(min_distinct), "min_distinct_finals"
    # Objective: XP across ALL crafts
    model += pl.lpSum(xp[i] * x[i] for i in items_set)
    # Exactly N finals (only if provided)
    if total_finals is not None:
        model += pl.lpSum(x[i] for i in finals) == int(total_finals), "exact_total_finals"
    # Per-final cap (if provided)
    if max_per_final is not None:
        for i in finals:
            model += x[i] <= int(max_per_final), v("cap_final", i)
            
    # === MODIFIED INGREDIENT CONSTRAINTS: Account for YIELD ===
    for k in (ingredient_items - finals):
        consume = pl.lpSum(needs_map[i].get(k, 0.0) * x[i] for i in items_set)
        # Production * Yield MUST EXACTLY match consumption
        model += yield_map.get(k, 1.0) * x[k] == consume, v("balance_eq", k)
        
    for k in (ingredient_items & finals):
        consume = pl.lpSum(needs_map[i].get(k, 0.0) * x[i] for i in items_set)
        # Production * Yield MUST be GREATER THAN OR EQUAL to consumption
        model += yield_map.get(k, 1.0) * x[k] >= consume, v("balance_ge", k)
    # =========================================================

    # Global time cap if provided or defaulted
    if effective_cap_hours is not None:
        total_cap_min = max(0.0, 60.0 * float(effective_cap_hours))
        model += pl.lpSum(tmin[i] * x[i] for i in items_set) <= total_cap_min, "global_time_cap"
    # ---------- Solve ----------
    model.solve(pl.PULP_CBC_CMD(msg=False))
    status = pl.LpStatus[model.status]
    # ---------- Extract ----------
    cols = ["item","is_final","qty","xp_each","xp_total","time_min_each","time_min_total"]
    rows = []
    total_xp = total_time = finals_cnt = 0
    if status == "Optimal":
        for i in items_set:
            q = int(round((x[i].value() or 0)))
            if q > 0:
                is_final = i in finals
                rows.append({
                    "item": canon_name.get(i, i),
                    "is_final": is_final,
                    "qty": q,
                    "xp_each": xp[i],
                    "xp_total": xp[i]*q,
                    "time_min_each": tmin[i],
                    "time_min_total": tmin[i]*q
                })
                total_xp += xp[i]*q
                total_time += tmin[i]*q
                if is_final: finals_cnt += q
    plan = pd.DataFrame(rows, columns=cols)
    if not plan.empty:
        plan = plan.sort_values(["is_final","xp_each","time_min_each"],
                                 ascending=[False, False, True]).reset_index(drop=True)
    return {
        "status": status,
        "used_default_time_cap": used_default_cap,
        "time_cap_hours": effective_cap_hours,
        "blocked_finals": [canon_name.get(b, b) for b in blocked],
        "XP_total": total_xp,
        "finals_made": finals_cnt,
        "finals_target": (None if total_finals is None else int(total_finals)),
        "distinct_finals": (int(plan.loc[plan["is_final"], "item"].nunique()) if not plan.empty else 0),
        "total_time_used_min": total_time,
        "plan": plan
    }

AttributeError: 'str' object has no attribute 'apply'

In [2]:
# ----------------------------
# Example usage
# ----------------------------
# 1) Run with just level (defaults to 24-hour cap)
res = optimize_variety_with_ingredients(player_level=72)
print("\n--- Example 1: level-only (default 24h cap) ---")
print("Status:", res["status"], "| Used default time cap:", res["used_default_time_cap"])
print("Blocked finals:", res["blocked_finals"])
print("XP_total:", res["XP_total"])
print("Finals made:", res["finals_made"])
print("Distinct finals:", res["distinct_finals"])
print("Total time used (min):", res["total_time_used_min"])
print(res["plan"].head(20).to_string(index=False) if not res["plan"].empty else "<empty>")
# 2) Max XP within 3 hours, no exact item count
res2 = optimize_variety_with_ingredients(player_level=72, time_cap_hours=1)
print(f"\n--- Example 2: {res2['time_cap_hours']}-hour cap, no exact N ---")
print("Status:", res2["status"])
print("XP_total:", res2["XP_total"])
print("Finals made:", res2["finals_made"])
print("Distinct finals:", res2["distinct_finals"])
print("Total time used (min):", res2["total_time_used_min"])
print(res2["plan"].head(20).to_string(index=False) if not res2["plan"].empty else "<empty>")
# # 3) Exact 12 finals, at least 4 distinct, max 5 each, no time cap
# res3 = optimize_variety_with_ingredients(player_level=25, total_finals=12, min_distinct=4, max_per_final=5)
# print("\n--- Example 3: exact 12 finals, min 4 distinct, max 5 each ---")
# print("Status:", res3["status"])
# print("Blocked finals:", res3["blocked_finals"])
# print("XP_total:", res3["XP_total"])
# print("Finals made:", res3["finals_made"])
# print("Distinct finals:", res3["distinct_finals"])
# print(res3["plan"].head(20).to_string(index=False) if not res3["plan"].empty else "<empty>")


--- Example 1: level-only (default 24h cap) ---
Status: Optimal | Used default time cap: True
Blocked finals: ['Blueberry Muffin']
XP_total: 686.0
Finals made: 7
Distinct finals: 1
Total time used (min): 1435.0
         item  is_final  qty  xp_each  xp_total  time_min_each  time_min_total
Honey popcorn      True    7     43.0     301.0           90.0           630.0
        Honey     False   14     19.0     266.0           20.0           280.0
    Honeycomb     False   14      8.0     112.0           35.0           490.0
         Corn     False    7      1.0       7.0            5.0            35.0

--- Example 2: 1-hour cap, no exact N ---
Status: Optimal
XP_total: 5.0
Finals made: 1
Distinct finals: 1
Total time used (min): 35.0
   item  is_final  qty  xp_each  xp_total  time_min_each  time_min_total
Popcorn      True    1      4.0       4.0           30.0            30.0
   Corn     False    1      1.0       1.0            5.0             5.0
