
# IEOR E4004 — Project I (Model 2 Only): Realistic Expansion & Location (Gurobi)

This notebook loads the provided CSVs, prepares populations and spacing conflicts, and **builds & solves the Model 2** (realistic expansion with piecewise expansion cost and distance constraints).  
It will export a CSV summary named `model_2_summary.csv`.


In [7]:

# %% [markdown]
# ## 1) Imports & Global Config

import math
import pandas as pd
import numpy as np
from pathlib import Path
from gurobipy import Model, GRB, quicksum

DATA_DIR = Path("C:/Users/evere/Desktop/研究生课程/4004_Opt/ChildCareDeserts_Data")  # adjust if needed

# New facility sizes and costs
NEW_FACILITY_SIZES = {
    "small":  {"total": 100, "max_0_5": 50,  "cost": 65000},
    "medium": {"total": 200, "max_0_5": 100, "cost": 95000},
    "large":  {"total": 400, "max_0_5": 200, "cost": 115000},
}

# Distance threshold in miles
MIN_DISTANCE_MILES = 0.06

# Employment/Income thresholds for high-demand ZIPs
EMPLOYMENT_RATE_CUTOFF = 0.60
AVG_INCOME_CUTOFF = 60000.0


In [8]:

# %% [markdown]
# ## 2) Utility Functions

def haversine_miles(lat1, lon1, lat2, lon2):
    """Haversine distance in miles between two lat/lon points (degrees)."""
    R = 3958.8  # Earth radius in miles
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dlambda = math.radians(lon2 - lon1)
    a = math.sin(dphi/2.0)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2.0)**2
    return 2 * R * math.asin(math.sqrt(a))

def safe_series(df, col):
    """Return a Series if column exists, else zeros."""
    return df[col] if col in df.columns else 0

def compute_populations(pop_df: pd.DataFrame) -> pd.DataFrame:
    """
    Compute population aggregates by zipcode for:
    - pop_0_5 ≈ pop['0-4'] + (1/5)*pop['5-9']
    - pop_5_12 ≈ pop['5-9'] + (3/5)*pop['10-14']
    - pop_0_12 = pop_0_5 + pop_5_12
    """
    df = pop_df.copy()
    # Ensure numeric for all non-ZIP columns
    for c in df.columns:
        if c not in ["zipcode", "ZIP code", "zip_code"]:
            df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0.0)
    # Normalize ZIP column name
    if "zipcode" not in df.columns:
        if "ZIP code" in df.columns:
            df.rename(columns={"ZIP code": "zipcode"}, inplace=True)
        elif "zip_code" in df.columns:
            df.rename(columns={"zip_code": "zipcode"}, inplace=True)

    pop_0_4 = safe_series(df, "0-4")
    pop_5_9 = safe_series(df, "5-9")
    pop_10_14 = safe_series(df, "10-14")
    pop_0_5 = pop_0_4 + (1.0/5.0)*pop_5_9
    pop_5_12 = pop_5_9 + (3.0/5.0)*pop_10_14
    pop_0_12 = pop_0_5 + pop_5_12
    return df[["zipcode"]].assign(pop_0_5=pop_0_5, pop_5_12=pop_5_12, pop_0_12=pop_0_12)


In [9]:

# %% [markdown]
# ## 3) Load & Prepare Data

DATA_DIR = Path("/mnt/data")

# Existing facilities
ccare = pd.read_csv(DATA_DIR / "child_care_regulated.csv")

# Normalize zip column name
if "zip_code" in ccare.columns:
    ccare["zipcode"] = ccare["zip_code"]
elif "ZIP code" in ccare.columns:
    ccare["zipcode"] = ccare["ZIP code"]

# Ensure numeric capacities
for col in ["infant_capacity", "toddler_capacity", "preschool_capacity",
            "school_age_capacity", "children_capacity", "total_capacity"]:
    if col in ccare.columns:
        ccare[col] = pd.to_numeric(ccare[col], errors="coerce").fillna(0.0)
    else:
        ccare[col] = 0.0
ccare["latitude"] = pd.to_numeric(ccare["latitude"], errors="coerce")
ccare["longitude"] = pd.to_numeric(ccare["longitude"], errors="coerce")

# Potential new locations
locs = pd.read_csv(DATA_DIR / "potential_locations.csv")
if "zipcode" not in locs.columns and "ZIP code" in locs.columns:
    locs.rename(columns={"ZIP code": "zipcode"}, inplace=True)

# Population
pop_raw = pd.read_csv(DATA_DIR / "population.csv")
pop = compute_populations(pop_raw)

# Income
inc = pd.read_csv(DATA_DIR / "avg_individual_income.csv")
if "ZIP code" in inc.columns:
    inc.rename(columns={"ZIP code": "zipcode", "average income": "avg_income"}, inplace=True)
if "average income" in inc.columns and "avg_income" not in inc.columns:
    inc.rename(columns={"average income": "avg_income"}, inplace=True)

# Employment rate
emp = pd.read_csv(DATA_DIR / "employment_rate.csv")
if "employment rate" in emp.columns and "employment_rate" not in emp.columns:
    emp.rename(columns={"employment rate": "employment_rate"}, inplace=True)

# ZIP-level merged table
zip_df = pop.merge(inc[["zipcode", "avg_income"]], on="zipcode", how="left")
zip_df["avg_income"] = pd.to_numeric(zip_df["avg_income"], errors="coerce")
zip_df["employment_rate"] = pd.to_numeric(zip_df["employment_rate"], errors="coerce")
zip_df.fillna({"avg_income": np.inf, "employment_rate": 0.0}, inplace=True)

# Aggregate existing capacity per facility and by zip
ccare["cap_0_5"] = ccare[["infant_capacity", "toddler_capacity", "preschool_capacity"]].sum(axis=1)
ccare["cap_5_12"] = ccare["school_age_capacity"]
ccare["cap_total"] = ccare["total_capacity"].replace({np.nan: 0.0})
mask_total_zero = (ccare["cap_total"].isna()) | (ccare["cap_total"] <= 0)
ccare.loc[mask_total_zero, "cap_total"] = (ccare["cap_0_5"].fillna(0) + ccare["cap_5_12"].fillna(0))

# Ensure an ID per facility (string)
if "facility_id" not in ccare.columns:
    ccare["facility_id"] = np.arange(len(ccare))
ccare["facility_id"] = ccare["facility_id"].astype(str)

print(f"Facilities: {len(ccare)} | Potential locations: {len(locs)} | ZIPs: {len(zip_df)}")
ccare.head(2), locs.head(2), zip_df.head(2)


FileNotFoundError: [Errno 2] No such file or directory: '\\mnt\\data\\child_care_regulated.csv'

In [None]:

# %% [markdown]
# ## 4) Build Spacing Conflicts within ZIPs (0.06 miles)

def build_conflicts(ccare: pd.DataFrame, locs: pd.DataFrame, min_miles=MIN_DISTANCE_MILES):
    locs_clean = locs.dropna(subset=["latitude", "longitude"]).copy()
    ccare_clean = ccare.dropna(subset=["latitude", "longitude"]).copy()
    locs_clean["loc_id"] = locs_clean.index.astype(int)

    new_new_conflicts = {}
    new_exist_conflicts = {}

    for z, group in locs_clean.groupby("zipcode"):
        gps = group[["loc_id", "latitude", "longitude"]].values.tolist()
        pairs = []
        for i in range(len(gps)):
            for j in range(i+1, len(gps)):
                id_i, la_i, lo_i = gps[i]
                id_j, la_j, lo_j = gps[j]
                d = haversine_miles(la_i, lo_i, la_j, lo_j)
                if d < min_miles:
                    pairs.append((int(id_i), int(id_j)))
        new_new_conflicts[z] = pairs

        exist_pairs = []
        ex = ccare_clean[ccare_clean["zipcode"] == z]
        for _, r in group.iterrows():
            for _, e in ex.iterrows():
                d = haversine_miles(r["latitude"], r["longitude"], e["latitude"], e["longitude"])
                if d < min_miles:
                    exist_pairs.append((int(r["loc_id"]), str(e["facility_id"])))
        new_exist_conflicts[z] = exist_pairs

    return locs_clean, new_new_conflicts, new_exist_conflicts

locs_clean, new_new_conflicts, new_exist_conflicts = build_conflicts(ccare, locs, MIN_DISTANCE_MILES)
print("Conflicts built.")
print("Example new-new conflicts count (first 3 zips):",
      [(z, len(pairs)) for z, pairs in list(new_new_conflicts.items())[:3]])
print("Example new-existing conflicts count (first 3 zips):",
      [(z, len(pairs)) for z, pairs in list(new_exist_conflicts.items())[:3]])


In [None]:

# %% [markdown]
# ## 5) Build & Solve Model 2 (Realistic Expansion & Location)

def solve_model_2(ccare, locs, zip_df, new_new_conflicts, new_exist_conflicts):
    m = Model("Model_2_Realistic")

    F = list(ccare["facility_id"])
    facility_nf = dict(zip(F, ccare["cap_total"]))
    facility_zip = dict(zip(F, ccare["zipcode"]))

    L = list(locs.index)
    loc_zip = dict(zip(L, locs["zipcode"]))
    Z = list(zip_df["zipcode"])

    # Decision variables
    x1 = {f: m.addVar(lb=0.0, ub=0.10*facility_nf[f], vtype=GRB.CONTINUOUS, name=f"x1[{f}]") for f in F}
    x2 = {f: m.addVar(lb=0.0, ub=0.05*facility_nf[f], vtype=GRB.CONTINUOUS, name=f"x2[{f}]") for f in F}
    x3 = {f: m.addVar(lb=0.0, ub=0.05*facility_nf[f], vtype=GRB.CONTINUOUS, name=f"x3[{f}]") for f in F}

    y = {(l, s): m.addVar(vtype=GRB.BINARY, name=f"y_build[{l},{s}]") for l in L for s in NEW_FACILITY_SIZES}

    s0_5 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"s0_5[{z}]") for z in Z}
    s5_12 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"s5_12[{z}]") for z in Z}
    a_new_0_5 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"a_new0_5[{z}]") for z in Z}
    b_new_5_12 = {z: m.addVar(lb=0.0, vtype=GRB.CONTINUOUS, name=f"b_new5_12[{z}]") for z in Z}

    m.update()

    # Helpers
    cap0_5_zip = ccare.groupby("zipcode")["cap_0_5"].sum().to_dict()
    cap5_12_zip = ccare.groupby("zipcode")["cap_5_12"].sum().to_dict()
    cap_total_zip = ccare.groupby("zipcode")["cap_total"].sum().to_dict()

    pop_0_5 = dict(zip(zip_df["zipcode"], zip_df["pop_0_5"]))
    pop_5_12 = dict(zip(zip_df["zipcode"], zip_df["pop_5_12"]))
    pop_0_12 = dict(zip(zip_df["zipcode"], zip_df["pop_0_12"]))
    avg_income = dict(zip(zip_df["zipcode"], zip_df["avg_income"]))
    emp_rate = dict(zip(zip_df["zipcode"], zip_df["employment_rate"]))

    # Constraints
    for f in F:
        nf = facility_nf[f]
        m.addConstr(x1[f] + x2[f] + x3[f] <= 0.20 * nf, name=f"x_total_cap[{f}]")

    for z in Z:
        exp_in_z = quicksum(x1[f] + x2[f] + x3[f] for f in F if facility_zip[f] == z)
        new_tot_in_z = quicksum(NEW_FACILITY_SIZES[s]["total"] * y[l, s] for l in L if loc_zip[l] == z for s in NEW_FACILITY_SIZES)

        m.addConstr(s0_5[z] + s5_12[z] == cap_total_zip.get(z, 0.0) + exp_in_z + new_tot_in_z, name=f"alloc_balance[{z}]")
        m.addConstr(s0_5[z] <= cap0_5_zip.get(z, 0.0) + exp_in_z + a_new_0_5[z], name=f"max_0_5_bound[{z}]")
        m.addConstr(a_new_0_5[z] <= quicksum(NEW_FACILITY_SIZES[s]["max_0_5"] * y[l, s] for l in L if loc_zip[l] == z for s in NEW_FACILITY_SIZES), name=f"new_0_5_cap[{z}]")
        m.addConstr(a_new_0_5[z] + b_new_5_12[z] <= new_tot_in_z, name=f"new_split_tot[{z}]")

        high_demand = (emp_rate.get(z, 0.0) >= EMPLOYMENT_RATE_CUTOFF) or (avg_income.get(z, np.inf) <= AVG_INCOME_CUTOFF)
        threshold = 0.5 if high_demand else (1.0/3.0)
        m.addConstr(s0_5[z] + s5_12[z] >= threshold * pop_0_12.get(z, 0.0), name=f"desert_off[{z}]")
        m.addConstr(s0_5[z] >= (2.0/3.0) * pop_0_5.get(z, 0.0), name=f"policy_0_5[{z}]")

    # Distance conflicts
    for z, pairs in new_new_conflicts.items():
        for (i, j) in pairs:
            m.addConstr(quicksum(y[i, s] for s in NEW_FACILITY_SIZES) + quicksum(y[j, s] for s in NEW_FACILITY_SIZES) <= 1, name=f"dist_new_new[{z},{i},{j}]")
    for l in L:
        m.addConstr(quicksum(y[l, s] for s in NEW_FACILITY_SIZES) <= 1, name=f"one_per_loc[{l}]")
    for z, pairs in new_exist_conflicts.items():
        for (i, f) in pairs:
            m.addConstr(quicksum(y[i, s] for s in NEW_FACILITY_SIZES) <= 0, name=f"dist_new_exist_block[{z},{i},{f}]")

    # Objective
    expand_cost = quicksum( ((20000.0 / facility_nf[f]) + 200.0) * x1[f] +
                            ((20000.0 / facility_nf[f]) + 400.0) * x2[f] +
                            ((20000.0 / facility_nf[f]) + 1000.0) * x3[f]
                           for f in F if facility_nf[f] > 0 )
    new_build_cost = quicksum( NEW_FACILITY_SIZES[s]["cost"] * y[l, s] for l in L for s in NEW_FACILITY_SIZES )
    m.setObjective(expand_cost + new_build_cost, GRB.MINIMIZE)

    m.Params.OutputFlag = 1
    m.optimize()

    status = m.Status
    obj = m.ObjVal if status in (GRB.OPTIMAL, GRB.SUBOPTIMAL) else None

    # Export summary
    rows = []
    for z in Z:
        rows.append({
            "zipcode": z,
            "s0_5": s0_5[z].X if s0_5[z].X is not None else 0.0,
            "s5_12": s5_12[z].X if s5_12[z].X is not None else 0.0,
        })
    summary = pd.DataFrame(rows)
    summary["model_obj_total_cost"] = obj
    summary.to_csv("model_2_summary.csv", index=False)

    return {"status": status, "objective": obj, "summary": summary, "model": m}

res2 = solve_model_2(ccare, locs_clean, zip_df, new_new_conflicts, new_exist_conflicts)
print(f"Model 2 status: {res2['status']} | Min total funding: {res2['objective']}")


In [None]:

# %% [markdown]
# ## 6) Inspect Results

import pandas as pd
m2 = pd.read_csv("model_2_summary.csv")
m2.head(10)
