In [None]:
import math
import itertools
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, Tuple, List

# ---------------------Configuration----------------------------

DATA_DIR = Path("xxxxxx")  # adjust if needed

# Facility sizes and constraints for NEW builds
# Each has max 0-5 capacity equal to half the total capacity
NEW_FACILITY_SIZES = {
    "small":  {"total": 100, "max_0_5": 50,  "cost": 65000},
    "medium": {"total": 200, "max_0_5": 100, "cost": 95000},
    "large":  {"total": 400, "max_0_5": 200, "cost": 115000},
}

# Special equipment cost per new slot for ages 0–5
NEW_EQUIP_COST_PER_0_5 = 100.0

# Distance threshold in miles (Realistic model)
MIN_DISTANCE_MILES = 0.06

# Employment & income thresholds for "high-demand"
EMPLOYMENT_RATE_CUTOFF = 0.60
AVG_INCOME_CUTOFF = 60000

# ---------------------Budgeting (By Karthik)----------------------------

def compute_populations(pop_df)
    df = pop_df.copy()
    
    # Ensure numeric
    for c in df.columns:
        if c not in ["zipcode", "ZIP code", "zip_code"]:
            df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0.0)
    # Resolve ZIP column name to 'zipcode'
    if "zipcode" not in df.columns:
        if "ZIP code" in df.columns:
            df.rename(columns={"ZIP code": "zipcode"}, inplace=True)
        elif "zip_code" in df.columns:
            df.rename(columns={"zip_code": "zipcode"}, inplace=True)
            
    # Population bins we need
    pop_0_4 = safe_series(df, "0-4")
    pop_5_9 = safe_series(df, "5-9")
    pop_10_14 = safe_series(df, "10-14")

    # Assume the age is uniformly distributed
    pop_0_5 = pop_0_4 + (1.0/5.0)*pop_5_9
    pop_5_12 = pop_5_9 + (3.0/5.0)*pop_10_14
    pop_0_12 = pop_0_5 + pop_5_12
    return df[["zipcode"]].assign(pop_0_5=pop_0_5, pop_5_12=pop_5_12, pop_0_12=pop_0_12)

'''
Budgeting Part: xxxxxxx
'''


# -------------------Loading and Cleaning Data----------------------
def load_data():
    
    # Child care facilities (existing)
    ccare = pd.read_csv(DATA_DIR / "child_care_regulated.csv")    
    ccare.rename(columns={"zip_code": "zipcode"}, inplace=True)
    # Ensure numeric capacities (fill NaNs with 0)
    for col in ["infant_capacity", "toddler_capacity", "preschool_capacity", "school_age_capacity", "children_capacity", "total_capacity"]:
        if col in ccare.columns:
            ccare[col] = pd.to_numeric(ccare[col], errors="coerce").fillna(0.0)
        else:
            ccare[col] = 0.0
    ccare["latitude"] = pd.to_numeric(ccare["latitude"], errors="coerce")
    ccare["longitude"] = pd.to_numeric(ccare["longitude"], errors="coerce")
    
    # Potential new locations
    locs = pd.read_csv(DATA_DIR / "potential_locations.csv")

    # Population
    pop_raw = pd.read_csv(DATA_DIR / "population.csv")
    pop = compute_populations(pop_raw)
    
    # Income
    inc = pd.read_csv(DATA_DIR / "avg_individual_income.csv")
    inc.rename(columns={"ZIP code": "zipcode"}, inplace=True)
    inc.rename(columns={"average income": "avg_income"}, inplace=True)

    # Employment rate
    emp = pd.read_csv(DATA_DIR / "employment_rate.csv")
    emp.rename(columns={"employment rate": "employment_rate"}, inplace=True)

    # Merge zip-level data
    zip_df = pop.merge(inc[["zipcode", "avg_income"]], on="zipcode", how="left") \
                .merge(emp[["zipcode", "employment_rate"]], on="zipcode", how="left")
    zip_df["avg_income"] = pd.to_numeric(zip_df["avg_income"], errors="coerce")
    zip_df["employment_rate"] = pd.to_numeric(zip_df["employment_rate"], errors="coerce")
    zip_df.fillna({"avg_income": np.inf, "employment_rate": 0.0}, inplace=True) 

    # Pre-aggregate existing capacity per facility and by zip
    # Assume that the age 0-5 covers the "infant_capacity", "toddler_capacity", "preschool_capacity"
    # Assume that the age 5-12 covers the "school_age_capacity"
    ccare["cap_0_5"] = ccare[["infant_capacity", "toddler_capacity", "preschool_capacity"]].sum(axis=1)
    ccare["cap_5_12"] = ccare["school_age_capacity"]
    ccare["cap_total"] = ccare["total_capacity"].replace({np.nan: 0.0})
    
    # If total_capacity is missing or zero, fallback to sum of parts
    mask_total_zero = (ccare["cap_total"].isna()) | (ccare["cap_total"] <= 0)
    ccare.loc[mask_total_zero, "cap_total"] = (ccare["cap_0_5"].fillna(0) + ccare["cap_5_12"].fillna(0))

    # Facilities with IDs
    if "facility_id" not in ccare.columns:
        ccare["facility_id"] = np.arange(len(ccare))
    ccare["facility_id"] = ccare["facility_id"].astype(str)

    return ccare, locs, zip_df


# --------------------------Modeling - Idealistic Part (By Weiwen)----------------------










# --------------------------Modeling - Realistic Part--------------------------------------

def build_conflicts(ccare: pd.DataFrame, locs: pd.DataFrame, min_miles=MIN_DISTANCE_MILES):
    # Clean coords
    locs_clean = locs.dropna(subset=["latitude", "longitude"])
    ccare_clean = ccare.dropna(subset=["latitude", "longitude"])

    # Index potential locations within each zip
    locs_clean = locs_clean.copy()
    locs_clean["loc_id"] = locs_clean.index.astype(int)

    new_new_conflicts = {}
    new_exist_conflicts = {}

    for z, group in locs_clean.groupby("zipcode"):
        gps = group[["loc_id", "latitude", "longitude"]].values.tolist()
        pairs = []
        for i in range(len(gps)):
            for j in range(i+1, len(gps)):
                id_i, la_i, lo_i = gps[i]
                id_j, la_j, lo_j = gps[j]
                d = haversine_miles(la_i, lo_i, la_j, lo_j)
                if d < min_miles:
                    pairs.append((int(id_i), int(id_j)))
        new_new_conflicts[z] = pairs

        # new-existing within this zip
        exist_pairs = []
        ex = ccare_clean[ccare_clean["zipcode"] == z]
        for _, r in group.iterrows():
            for _, e in ex.iterrows():
                d = haversine_miles(r["latitude"], r["longitude"], e["latitude"], e["longitude"])
                if d < min_miles:
                    exist_pairs.append((int(r["loc_id"]), str(e["facility_id"])))
        new_exist_conflicts[z] = exist_pairs

    return locs_clean, new_new_conflicts, new_exist_conflicts




from gurobipy import Model, GRB, quicksum

def build_and_solve_model_2(ccare, locs, zip_df, new_new_conflicts, new_exist_conflicts):
    m = Model("Model_2_Realistic")

    # Indexing sets
    F = list(ccare["facility_id"])
    facility_nf = dict(zip(F, ccare["cap_total"]))
    facility_zip = dict(zip(F, ccare["zipcode"]))

    L = list(locs.index)
    loc_zip = dict(zip(L, locs["zipcode"]))
    Z = list(zip_df["zipcode"])

    # Decision variables
    # Expansion segments per facility
    x1 = {f: m.addVar(lb=0.0, ub=0.10*facility_nf[f], vtype=GRB.CONTINUOUS, name=f"x1[{f}]") for f in F}
    x2 = {f: m.addVar(lb=0.0, ub=0.05*facility_nf[f], vtype=GRB.CONTINUOUS, name=f"x2[{f}]") for f in F}
    x3 = {f: m.addVar(lb=0.0, ub=0.05*facility_nf[f], vtype=GRB.CONTINUOUS, name=f"x3[{f}]") for f in F}

    # New build binary selection
    y = {(l, s): m.addVar(vtype=GRB.BINARY, name=f"y_build[{l},{s}]") for l in L for s in NEW_FACILITY_SIZES}

    # Zip-level allocation variables
    s0_5 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"s0_5[{z}]") for z in Z}
    s5_12 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"s5_12[{z}]") for z in Z}
    a_new_0_5 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"a_new0_5[{z}]") for z in Z}
    b_new_5_12 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"b_new5_12[{z}]") for z in Z}

    m.update()

    # Helpers
    cap0_5_zip = ccare.groupby("zipcode")["cap_0_5"].sum().to_dict()
    cap5_12_zip = ccare.groupby("zipcode")["cap_5_12"].sum().to_dict()
    cap_total_zip = ccare.groupby("zipcode")["cap_total"].sum().to_dict()

    pop_0_5 = dict(zip(zip_df["zipcode"], zip_df["pop_0_5"]))
    pop_5_12 = dict(zip(zip_df["zipcode"], zip_df["pop_5_12"]))
    pop_0_12 = dict(zip(zip_df["zipcode"], zip_df["pop_0_12"]))
    avg_income = dict(zip(zip_df["zipcode"], zip_df["avg_income"]))
    emp_rate = dict(zip(zip_df["zipcode"], zip_df["employment_rate"]))

    # Constraints

    # Expansion total per facility ≤ 20% nf
    for f in F:
        nf = facility_nf[f]
        m.addConstr(x1[f] + x2[f] + x3[f] <= 0.20 * nf, name=f"x_total_cap[{f}]")

    # Allocation & policy per zip
    for z in Z:
        exp_in_z = quicksum(x1[f] + x2[f] + x3[f] for f in F if facility_zip[f] == z)
        new_tot_in_z = quicksum(NEW_FACILITY_SIZES[s]["total"] * y[l, s] for l in L if loc_zip[l] == z for s in NEW_FACILITY_SIZES)

        m.addConstr(s0_5[z] + s5_12[z] == cap_total_zip.get(z, 0.0) + exp_in_z + new_tot_in_z, name=f"alloc_balance[{z}]")
        m.addConstr(s0_5[z] <= cap0_5_zip.get(z, 0.0) + exp_in_z + a_new_0_5[z], name=f"max_0_5_bound[{z}]")
        m.addConstr(a_new_0_5[z] <= quicksum(NEW_FACILITY_SIZES[s]["max_0_5"] * y[l, s] for l in L if loc_zip[l] == z for s in NEW_FACILITY_SIZES), name=f"new_0_5_cap[{z}]")
        m.addConstr(a_new_0_5[z] + b_new_5_12[z] <= new_tot_in_z, name=f"new_split_tot[{z}]")

        high_demand = (emp_rate.get(z, 0.0) >= EMPLOYMENT_RATE_CUTOFF) or (avg_income.get(z, np.inf) <= AVG_INCOME_CUTOFF)
        threshold = 0.5 if high_demand else (1.0/3.0)
        m.addConstr(s0_5[z] + s5_12[z] >= threshold * pop_0_12.get(z, 0.0), name=f"desert_off[{z}]")
        m.addConstr(s0_5[z] >= (2.0/3.0)*pop_0_5.get(z, 0.0), name=f"policy_0_5[{z}]")

    # Distance conflicts within each ZIP
    # new-new
    for z, pairs in new_new_conflicts.items():
        for (i, j) in pairs:
            # If any size is built at location i and any size at j, they cannot both be 1 in total.
            # Sum across sizes at a location (at most one size per location). We enforce one facility max per location.
            m.addConstr(quicksum(y[i, s] for s in NEW_FACILITY_SIZES) + quicksum(y[j, s] for s in NEW_FACILITY_SIZES) <= 1, name=f"dist_new_new[{z},{i},{j}]")

    # ensure at most one facility per potential location
    for l in L:
        m.addConstr(quicksum(y[l, s] for s in NEW_FACILITY_SIZES) <= 1, name=f"one_per_loc[{l}]")

    # new-existing
    for z, pairs in new_exist_conflicts.items():
        for (i, f) in pairs:
            # If location i is too close to existing facility f, then we forbid building at i (within this model).
            # This is a conservative modeling choice (you could also allow but add penalty).
            m.addConstr(quicksum(y[i, s] for s in NEW_FACILITY_SIZES) <= 0, name=f"dist_new_exist_block[{z},{i},{f}]")
