In [1]:
using CSV
using DataFrames

using JuMP
using Gurobi

using LinearAlgebra
using Statistics
using Random
Random.seed!(42)  # For reproducibility

TaskLocalRNG()

## Import data

In [2]:
# Paths (relative to notebook structure)
restaurant_path    = "../clean_data/restaurant_data_expanded.csv"
scrap_path         = "../clean_data/food_scrap_locations.csv"
neighborhood_path  = "../clean_data/neighborhood_supply.csv"

# Read CSVs into DataFrames
food_scrap_locations  = CSV.read(scrap_path, DataFrame)
neighborhood_supply   = CSV.read(neighborhood_path, DataFrame)
restaurant_data       = CSV.read(restaurant_path, DataFrame)
restaurant_data = restaurant_data[shuffle(1:nrow(restaurant_data))[1:500], :]

# Verify
println("Sampled restaurants: ", nrow(restaurant_data))
println("Total supply: ", sum(restaurant_data.waste) / 1e6, " million lbs")

Sampled restaurants: 500
Total supply: 37.292250259216736 million lbs


# Clean the data:

## Ensure no commas in numbers, each field cast as correct data type, drop unnecessary columns, and no negative supply from synthetic data

In [3]:
# ================================
# 3. CLEAN RESTAURANT
# ================================
# Columns: latitude | longitude | waste
rename!(restaurant_data, names(restaurant_data)[3] => :supply)

# Ensure Float64
restaurant_data.supply    = Float64.(restaurant_data.supply)
restaurant_data.latitude  = Float64.(restaurant_data.latitude)
restaurant_data.longitude = Float64.(restaurant_data.longitude)

# *** DROP RESTAURANTS WITH NEGATIVE SUPPLY ***
filter!(row -> row.supply >= 0, restaurant_data)


# ================================
# 3. CLEAN FOOD SCRAP CENTER DATA
# ================================
# rename Latitude and Longitude to latitude and longitude for consistency
rename!(food_scrap_locations, names(food_scrap_locations)[15] => :latitude)
rename!(food_scrap_locations, names(food_scrap_locations)[16] => :longitude)

# Keep only coordinates we need
food_scrap_locations.latitude = Float64.(food_scrap_locations.latitude)
food_scrap_locations.longitude = Float64.(food_scrap_locations.longitude)

# Keep only coordinate columns in food_scrap_locations
select!(food_scrap_locations, [:latitude, :longitude])

# ================================
# 4. CLEAN NEIGHBORHOOD SUPPLY DATA
# ================================
# Rename demand column for clarity
rename!(neighborhood_supply, names(neighborhood_supply)[4] => :supply_gap)

# Now neighborhood_supply.supply_gap might be String OR Float64.
# Only do replace/parse if it's strings.
if eltype(neighborhood_supply.supply_gap) <: AbstractString
    neighborhood_supply.supply_gap =
        parse.(Float64, replace.(neighborhood_supply.supply_gap, "," => ""))
end

# Demand = positive deficit, surplus -> 0
neighborhood_supply.demand = max.(0.0, -neighborhood_supply.supply_gap)

neighborhood_supply.latitude  = Float64.(neighborhood_supply.latitude)
neighborhood_supply.longitude = Float64.(neighborhood_supply.longitude)

# keep only necessary columns from neighborhood supply
select!(neighborhood_supply, [:latitude, :longitude, :demand, :Year])

# ================================
# 5. SHOW CLEANED HEADS
# ================================
println("=== Restaurants (cleaned) ===")
println(first(restaurant_data, 5))

println("\n=== Food Scrap Locations (cleaned) ===")
println(first(food_scrap_locations, 5))

println("\n=== Neighborhood Supply (cleaned) ===")
println(first(neighborhood_supply, 5))

=== Restaurants (cleaned) ===
[1m5×3 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m[1m supply        [0m
     │[90m Float64  [0m[90m Float64   [0m[90m Float64       [0m
─────┼────────────────────────────────────
   1 │  40.7424   -73.9943      1.11117e5
   2 │  40.7323   -73.8722  71532.9
   3 │  40.7794   -73.9492  98561.5
   4 │  40.6102   -73.9208  19103.5
   5 │  40.7315   -73.861   94690.4

=== Food Scrap Locations (cleaned) ===
[1m5×2 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m
     │[90m Float64  [0m[90m Float64   [0m
─────┼─────────────────────
   1 │  40.6355   -74.0228
   2 │  40.7526   -73.969
   3 │  40.7635   -74.0002
   4 │  40.762    -73.9693
   5 │  40.7174   -74.0108

=== Neighborhood Supply (cleaned) ===
[1m5×4 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m[1m demand    [0m[1m Year  [0m
     │[90m Float64  [0m[90m Float64   [0m[90m Float64   [0m[90m Int64 [0m
─────┼─────────────────

## Get vectors for supply[i] for all restaurants, demand[k] for all neighborhoods, and cij[i,j] and cjk[j,k] to plug directly into JuMP model

In [27]:
# ================
# 6. EXTRACT VECTORS - CORRECTED
# ================
R = nrow(restaurant_data)
D = nrow(food_scrap_locations)

# Get unique neighborhoods (spatial locations only, not duplicated across years)
neighborhood_locations = unique(
    neighborhood_supply[neighborhood_supply.Year .== 2023, :],
    [:latitude, :longitude]
)

N = nrow(neighborhood_locations)  # This should be 197, not 591

println("Problem dimensions:")
println("  Restaurants (R): $R")
println("  Distribution Centers (D): $D")
println("  Neighborhoods (N): $N")

# Prepare demand vectors (already correct from your earlier code)
demand_2023 = neighborhood_supply[neighborhood_supply.Year .== 2023, :].demand
demand_2024 = neighborhood_supply[neighborhood_supply.Year .== 2024, :].demand
demand_2025 = neighborhood_supply[neighborhood_supply.Year .== 2025, :].demand

demand_df = DataFrame(
    y2023 = demand_2023,
    y2024 = demand_2024,
    y2025 = demand_2025
)

# Total capacity
Q = sum(restaurant_data.supply)
F = 10000

# ================
# 7. MANHATTAN DISTANCE FUNCTION
# ================
manhattan(lat1, lon1, lat2, lon2) = abs(lat1 - lat2) + abs(lon1 - lon2)

# ================
# 8. COST MATRICES - CORRECTED
# ================
# c_ij: Restaurants (i) → Distribution centers (j)
c_ij = [manhattan(restaurant_data.latitude[i], restaurant_data.longitude[i],
                 food_scrap_locations.latitude[j], food_scrap_locations.longitude[j])
       for i in 1:R, j in 1:D]

# c_jk: Distribution centers (j) → Neighborhoods (k)
# Use neighborhood_locations (197 unique locations) instead of neighborhood_supply (591 rows)
c_jk = [manhattan(food_scrap_locations.latitude[j], food_scrap_locations.longitude[j],
                 neighborhood_locations.latitude[k], neighborhood_locations.longitude[k])
       for j in 1:D, k in 1:N]

println("\nCost matrix dimensions:")
println("  c_ij (R x D): ", size(c_ij))
println("  c_jk (D x N): ", size(c_jk))

# Verify everything matches
@assert size(c_ij) == (R, D) "c_ij dimension mismatch!"
@assert size(c_jk) == (D, N) "c_jk dimension mismatch!"
@assert nrow(demand_df) == N "demand_df rows must equal N!"
@assert N == 197 "N should be 197 (unique neighborhoods), not 591!"

println("\n✓ All dimensions verified!")
println("  N = $N")
println("  demand_df rows = $(nrow(demand_df))")
println("  c_jk columns = $(size(c_jk)[2])")

Problem dimensions:
  Restaurants (R): 500
  Distribution Centers (D): 201
  Neighborhoods (N): 197

Cost matrix dimensions:
  c_ij (R x D): (500, 201)
  c_jk (D x N): (201, 197)

✓ All dimensions verified!
  N = 197
  demand_df rows = 197
  c_jk columns = 197


# Sanity check data

In [28]:
println("Demand: min = ", minimum(demand), ", max = ", maximum(demand))
println("Any NaN in demand? ", any(isnan.(demand)))
println("Any Inf in demand? ", any(isinf.(demand)))

println("Any NaN in cij? ", any(isnan.(cij)))
println("Any Inf in c_ij? ", any(isinf.(c_ij)))

println("Any NaN in cjk? ", any(isnan.(cjk)))
println("Any Inf in cjk? ", any(isinf.(cjk)))

Demand: min = 0.0, max = 4.27814874096381e6


Any NaN in demand? false
Any Inf in demand? false
Any NaN in cij? false
Any Inf in c_ij? false
Any NaN in cjk? false
Any Inf in cjk? false


## Stochastic Demand

### Adaptive Stochastic: Opening Centers Based On Historical Demand Distribution

We formulate the problem as a two-stage stochastic program with recourse. We solve the multi-objective problem using the weighted-sum method, systematically varying weights $w_c$ and $w_e$ to trace the Pareto frontier.

**Sets and Indices:**
- $R$: Set of restaurants, indexed by $i$
- $D$: Set of distribution centers, indexed by $j$
- $N$: Set of neighborhoods, indexed by $k$
- $S$: Set of demand scenarios, indexed by $s \in \{2023, 2024, 2025\}$

**Parameters:**
- $s_i$: Food supply available at restaurant $i$
- $d_k^s$: Food demand in neighborhood $k$ under scenario $s$
- $c_{ij}$: Transportation cost from restaurant $i$ to center $j$
- $c_{jk}$: Transportation cost from center $j$ to neighborhood $k$
- $F$: Fixed cost of opening a distribution center
- $Q$: Total system capacity (sum of all supply)
- $p_s$: Probability of scenario $s$ occurring

**Decision Variables:**
- $z_j \in \{0,1\}$: Binary variable indicating if center $j$ is opened (first-stage)
- $x_{ij}^s \geq 0$: Flow from restaurant $i$ to center $j$ in scenario $s$
- $y_{jk}^s \geq 0$: Flow from center $j$ to neighborhood $k$ in scenario $s$
- $u_k^s \geq 0$: Unmet demand at neighborhood $k$ in scenario $s$
- $t^s \geq 0$: Worst (maximum) unmet demand across all neighborhoods in scenario $s$

**Objective Function:**

$$\min \quad F \sum_{j \in D} z_j + \sum_{s \in S} p_s \left[ w_c \cdot \text{Cost}^s + w_e \cdot t^s \right]$$

where:

$$\text{Cost}^s = \sum_{i \in R, j \in D} c_{ij} x_{ij}^s + \sum_{j \in D, k \in N} c_{jk} y_{jk}^s$$

**Constraints:**

1. **Supply constraints:** Each restaurant must send all its supply
   $$\sum_{j \in D} x_{ij}^s = s_i \quad \forall i \in R, s \in S$$

2. **Flow conservation:** What flows into a center must flow out
   $$\sum_{k \in N} y_{jk}^s = \sum_{i \in R} x_{ij}^s \quad \forall j \in D, s \in S$$

3. **Demand satisfaction:** Demand is met by deliveries plus unmet demand
   $$\sum_{j \in D} y_{jk}^s + u_k^s = d_k^s \quad \forall k \in N, s \in S$$

4. **Center capacity:** Flow through a center only if it's open
   $$\sum_{i \in R} x_{ij}^s \leq Q \cdot z_j \quad \forall j \in D, s \in S$$

5. **Equity constraint (minimax):** Track worst unmet demand in each scenario
   $$t^s \geq u_k^s \quad \forall k \in N, s \in S$$

In [11]:
R = nrow(restaurant_data)
D = nrow(food_scrap_locations)
N = nrow(neighborhood_supply[neighborhood_supply.Year .== 2023, :])
Q = sum(restaurant_data.supply)

M = sum(demand)
F = 10000       # Center opening cost

S = 3 # 3 demand years

demand_2023 = neighborhood_supply[neighborhood_supply.Year .== 2023, :].demand
demand_2024 = neighborhood_supply[neighborhood_supply.Year .== 2024, :].demand
demand_2025 = neighborhood_supply[neighborhood_supply.Year .== 2025, :].demand

demand_df = DataFrame(
    y2023 = demand_2023,
    y2024 = demand_2024,
    y2025 = demand_2025
)

Row,y2023,y2024,y2025
Unnamed: 0_level_1,Float64,Float64,Float64
1,0.0,0.0,1.02143e5
2,0.0,0.0,3.33493e5
3,0.0,0.0,0.0
4,0.0,0.0,1.13653e5
5,0.0,0.0,0.0
6,82217.3,13128.3,5.06273e5
7,2.25651e5,0.0,0.0
8,9.04474e5,0.0,1.13911e5
9,0.0,0.0,0.0
10,1.17086e6,2.4661e6,0.0


In [12]:
println("Total supply: ", sum(supply))
println("Total demand (avg): ", mean([sum(demand_df[:,s]) for s in 1:S]))
println("Supply/Demand ratio: ", sum(supply) / mean([sum(demand_df[:,s]) for s in 1:S]))

Total supply: 3.729225025921673e7
Total demand (avg): 5.081311007183481e7
Supply/Demand ratio: 0.7339100127210566


In [18]:
using JuMP, Gurobi

"""
Solve two-stage stochastic food rescue distribution with equity constraints

Parameters:
- restaurant_data: DataFrame with supply column
- food_scrap_locations: DataFrame of potential distribution centers
- demand_df: DataFrame with columns y2023, y2024, y2025
- c_ij: Cost matrix from restaurants to distribution centers [R x D]
- c_jk: Cost matrix from distribution centers to neighborhoods [D x N]
- w_c: Weight for transportation cost
- w_e: Weight for equity (worst unmet demand)
- F: Fixed cost of opening a distribution center (default: 10000)
- time_limit: Solver time limit in seconds (default: 300)
- mip_gap: MIP optimality gap (default: 0.01)

Returns:
- Dictionary with optimization results
"""
function solve_stochastic_food_rescue(
    restaurant_data::DataFrame,
    food_scrap_locations::DataFrame,
    demand_df::DataFrame,
    c_ij::Matrix,
    c_jk::Matrix,
    w_c::Float64,
    w_e::Float64;
    F::Float64 = 10000.0,
    time_limit::Int = 300,
    mip_gap::Float64 = 0.01
)
    
    # Scenario probability (equal weight)
    p = 1/3
    
    # Initialize model
    model = Model(Gurobi.Optimizer)
    set_optimizer_attribute(model, "TimeLimit", time_limit)
    set_optimizer_attribute(model, "MIPGap", mip_gap)
    set_optimizer_attribute(model, "OutputFlag", 0)  # Suppress solver output
    
    # DECISION VARIABLES
    
    # First-stage: center opening decisions
    @variable(model, z[1:D], Bin)
    
    # Second-stage: flow variables for each scenario
    @variable(model, x[1:R, 1:D, 1:S] >= 0)  # Restaurant to center
    @variable(model, y[1:D, 1:N, 1:S] >= 0)  # Center to neighborhood
    @variable(model, u[1:N, 1:S] >= 0)       # Unmet demand
    @variable(model, t[1:S] >= 0)            # Worst unmet demand per scenario
    
    # OBJECTIVE FUNCTION
    @expression(model, fixed_costs, F * sum(z[j] for j in 1:D))
    
    @expression(model, transport_cost[s in 1:S],
        sum(c_ij[i,j] * x[i,j,s] for i in 1:R, j in 1:D) +
        sum(c_jk[j,k] * y[j,k,s] for j in 1:D, k in 1:N)
    )
    
    @objective(model, Min, 
        fixed_costs + 
        p * sum(w_c * transport_cost[s] + w_e * t[s] for s in 1:S)
    )
    
    # CONSTRAINTS
    
    # 1. Supply constraints: each restaurant sends all its supply
    @constraint(model, supply_constraint[i in 1:R, s in 1:S],
        sum(x[i,j,s] for j in 1:D) == restaurant_data.supply[i]
    )
    
    # 2. Flow conservation at distribution centers
    @constraint(model, flow_conservation[j in 1:D, s in 1:S],
        sum(y[j,k,s] for k in 1:N) == sum(x[i,j,s] for i in 1:R)
    )
    
    # 3. Demand satisfaction for each scenario
    @constraint(model, demand_2023_constraint[k in 1:N],
        sum(y[j,k,1] for j in 1:D) + u[k,1] == demand_df.y2023[k]
    )
    
    @constraint(model, demand_2024_constraint[k in 1:N],
        sum(y[j,k,2] for j in 1:D) + u[k,2] == demand_df.y2024[k]
    )
    
    @constraint(model, demand_2025_constraint[k in 1:N],
        sum(y[j,k,3] for j in 1:D) + u[k,3] == demand_df.y2025[k]
    )
    
    # 4. Center capacity linking (only open centers can have flow)
    @constraint(model, center_capacity[j in 1:D, s in 1:S],
        sum(x[i,j,s] for i in 1:R) <= Q * z[j]
    )
    
    # 5. Equity constraint: track worst unmet demand in each scenario
    @constraint(model, equity_constraint[k in 1:N, s in 1:S],
        t[s] >= u[k,s]
    )
    
    # SOLVE
    optimize!(model)
    
    # EXTRACT RESULTS
    if termination_status(model) in [MOI.OPTIMAL, MOI.TIME_LIMIT, MOI.ALMOST_OPTIMAL]
        
        # Calculate component costs
        fixed_cost_value = F * sum(value.(z))
        transport_costs = [value(transport_cost[s]) for s in 1:S]
        expected_transport = p * w_c * sum(transport_costs)
        worst_unmet = [value(t[s]) for s in 1:S]
        expected_equity = p * w_e * sum(worst_unmet)
        
        results = Dict(
            # Optimization status
            "status" => termination_status(model),
            "solve_time" => solve_time(model),
            
            # Objective components
            "objective" => objective_value(model),
            "fixed_cost" => fixed_cost_value,
            "expected_transport_cost" => expected_transport,
            "expected_equity_penalty" => expected_equity,
            
            # Weights used
            "w_cost" => w_c,
            "w_equity" => w_e,
            
            # Centers opened
            "centers_opened" => findall(x -> x > 0.5, value.(z)),
            "num_centers" => sum(value.(z) .> 0.5),
            
            # Scenario-specific metrics
            "worst_unmet_by_scenario" => Dict(
                "2023" => worst_unmet[1],
                "2024" => worst_unmet[2],
                "2025" => worst_unmet[3]
            ),
            "total_unmet_by_scenario" => Dict(
                "2023" => sum(value(u[k,1]) for k in 1:N),
                "2024" => sum(value(u[k,2]) for k in 1:N),
                "2025" => sum(value(u[k,3]) for k in 1:N)
            ),
            "transport_cost_by_scenario" => Dict(
                "2023" => transport_costs[1],
                "2024" => transport_costs[2],
                "2025" => transport_costs[3]
            ),
            
            # Detailed solution (optional - comment out if too large)
            "z_values" => value.(z),
            "unmet_demand" => value.(u)
        )
        
        return results
    else
        @warn "Optimization failed with status: $(termination_status(model))"
        return Dict(
            "status" => termination_status(model),
            "objective" => NaN,
            "w_cost" => w_c,
            "w_equity" => w_e
        )
    end
end

# Example: Iterate over multiple weight pairs to trace Pareto frontier
function generate_pareto_frontier(
    restaurant_data::DataFrame,
    food_scrap_locations::DataFrame,
    demand_df::DataFrame,
    c_ij::Matrix,
    c_jk::Matrix;
    n_points::Int = 11,
    F::Float64 = 10000.0
)
    
    # Generate weight pairs that sum to 1
    weight_pairs = [(w, 1-w) for w in range(0, 1, length=n_points)]
    
    # Store results
    pareto_results = []
    
    println("Generating Pareto frontier with $n_points points...")
    for (i, (w_c, w_e)) in enumerate(weight_pairs)
        println("\nPoint $i/$n_points: w_cost=$w_c, w_equity=$w_e")
        
        result = solve_stochastic_food_rescue(
            restaurant_data,
            food_scrap_locations,
            demand_df,
            c_ij,
            c_jk,
            w_c,
            w_e;
            F = F,
            time_limit=60
        )
        
        push!(pareto_results, result)
        
        if result["status"] == MOI.OPTIMAL
            println("  Objective: $(round(result["objective"], digits=2))")
            println("  Centers: $(result["num_centers"])")
            println("  Avg worst unmet: $(round(mean(values(result["worst_unmet_by_scenario"])), digits=2))")
        end
    end
    
    return pareto_results
end

generate_pareto_frontier (generic function with 1 method)

In [29]:
println("Problem dimensions:")
println("  Restaurants (R): ", nrow(restaurant_data))
println("  Distribution Centers (D): ", nrow(food_scrap_locations))
println("  Neighborhoods (N): ", nrow(demand_df))
println("  Total binary variables: ", nrow(food_scrap_locations))
println("  Total continuous variables: ", nrow(restaurant_data) * nrow(food_scrap_locations) * 3 + 
                                          nrow(food_scrap_locations) * nrow(demand_df) * 3 + 
                                          nrow(demand_df) * 3 + 3)

Problem dimensions:
  Restaurants (R): 500
  Distribution Centers (D): 201
  Neighborhoods (N): 197
  Total binary variables: 201
  Total continuous variables: 420885


In [None]:
# Single solve with specific weights
result = solve_stochastic_food_rescue(
    restaurant_data,
    food_scrap_locations,
    demand_df,
    c_ij,
    c_jk,
    time_limit=60,
    
    0.7,  # w_c
    0.3   # w_e
)

Set parameter Username
Set parameter LicenseID to value 2702779
Academic license - for non-commercial use only - expires 2026-09-03
Set parameter TimeLimit to value 60
Set parameter MIPGap to value 0.01
Set parameter MIPGap to value 0.01
Set parameter TimeLimit to value 60


Dict{String, Any} with 15 entries:
  "expected_transport_cost"    => 2.63867e6
  "transport_cost_by_scenario" => Dict("2025"=>4.49664e6, "2023"=>3.77405e6, "2…
  "objective"                  => 4.15951e6
  "w_equity"                   => 0.3
  "centers_opened"             => [1, 2, 4, 5, 7, 8, 9, 10, 11, 12  …  186, 187…
  "fixed_cost"                 => 1.19e6
  "worst_unmet_by_scenario"    => Dict("2025"=>1.20888e6, "2023"=>4.03945e5, "2…
  "total_unmet_by_scenario"    => Dict("2025"=>1.3467e7, "2023"=>5.32262e6, "20…
  "unmet_demand"               => [0.0 0.0 0.0; 0.0 0.0 0.0; … ; 0.0 0.0 1.2088…
  "status"                     => TIME_LIMIT
  "z_values"                   => [1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0,…
  "expected_equity_penalty"    => 330832.0
  "w_cost"                     => 0.7
  "num_centers"                => 119
  "solve_time"                 => 60.262

In [19]:
# Generate Pareto frontier
pareto_results = generate_pareto_frontier(
    restaurant_data,
    food_scrap_locations,
    demand_df,
    cij,
    cjk,
    n_points = 5
)

# Convert results to DataFrame for analysis
pareto_df = DataFrame(
    w_cost = [r["w_cost"] for r in pareto_results],
    w_equity = [r["w_equity"] for r in pareto_results],
    objective = [r["objective"] for r in pareto_results],
    num_centers = [r["num_centers"] for r in pareto_results],
    expected_transport = [r["expected_transport_cost"] for r in pareto_results],
    expected_equity = [r["expected_equity_penalty"] for r in pareto_results]
)

Generating Pareto frontier with 5 points...

Point 1/5: w_cost=0.0, w_equity=1.0
Set parameter Username
Set parameter LicenseID to value 2702779
Academic license - for non-commercial use only - expires 2026-09-03
Set parameter TimeLimit to value 60
Set parameter MIPGap to value 0.01
Set parameter MIPGap to value 0.01
Set parameter TimeLimit to value 60
  Objective: 327078.19
  Centers: 1
  Avg worst unmet: 317078.19

Point 2/5: w_cost=0.25, w_equity=0.75
Set parameter Username
Set parameter LicenseID to value 2702779
Academic license - for non-commercial use only - expires 2026-09-03
Set parameter TimeLimit to value 60
Set parameter MIPGap to value 0.01
Set parameter MIPGap to value 0.01
Set parameter TimeLimit to value 60

Point 3/5: w_cost=0.5, w_equity=0.5
Set parameter Username
Set parameter LicenseID to value 2702779
Academic license - for non-commercial use only - expires 2026-09-03
Set parameter TimeLimit to value 60
Set parameter MIPGap to value 0.01
Set parameter MIPGap to val

Row,w_cost,w_equity,objective,num_centers,expected_transport,expected_equity
Unnamed: 0_level_1,Float64,Float64,Float64,Int64,Float64,Float64
1,0.0,1.0,327078.0,1,0.0,317078.0
2,0.25,0.75,2476210.0,1,2175800.0,290416.0
3,0.5,0.5,9192940.0,200,5947000.0,1245940.0
4,0.75,0.25,4573280.0,148,2804480.0,288795.0
5,1.0,0.0,4964070.0,142,3544070.0,0.0


In [20]:
"""
Solve anchor points to get normalization factors
"""
function get_normalization_factors(
    restaurant_data::DataFrame,
    food_scrap_locations::DataFrame,
    demand_df::DataFrame,
    c_ij::Matrix,
    c_jk::Matrix;
    F::Float64 = 10000.0
)
    
    println("Calculating normalization factors...")
    
    # Solve for minimum cost (w_c = 1, w_e = 0)
    println("  Solving for minimum cost...")
    result_min_cost = solve_stochastic_food_rescue(
        restaurant_data, food_scrap_locations, demand_df,
        c_ij, c_jk, 1.0, 0.0; F = F
    )
    min_cost = result_min_cost["expected_transport_cost"]
    
    # Solve for minimum equity penalty (w_c = 0, w_e = 1)
    println("  Solving for minimum equity penalty...")
    result_min_equity = solve_stochastic_food_rescue(
        restaurant_data, food_scrap_locations, demand_df,
        c_ij, c_jk, 0.0, 1.0; F = F
    )
    min_equity = result_min_equity["expected_equity_penalty"]
    
    println("Normalization factors:")
    println("  Min transport cost: $min_cost")
    println("  Min equity penalty: $min_equity")
    
    return min_cost, min_equity
end

"""
Solve with normalized objectives
"""
function solve_stochastic_normalized(
    restaurant_data::DataFrame,
    food_scrap_locations::DataFrame,
    demand_df::DataFrame,
    c_ij::Matrix,
    c_jk::Matrix,
    w_c::Float64,
    w_e::Float64,
    norm_cost::Float64,
    norm_equity::Float64;
    F::Float64 = 10000.0,
    time_limit::Int = 60,
    mip_gap::Float64 = 0.05
)
    
    R = nrow(restaurant_data)
    D = nrow(food_scrap_locations)
    N = nrow(demand_df)
    S = 3
    Q = sum(restaurant_data.supply)
    p = 1/3
    
    model = Model(Gurobi.Optimizer)
    set_optimizer_attribute(model, "TimeLimit", time_limit)
    set_optimizer_attribute(model, "MIPGap", mip_gap)
    set_optimizer_attribute(model, "OutputFlag", 0)
    set_optimizer_attribute(model, "MIPFocus", 1)
    set_optimizer_attribute(model, "Presolve", 2)
    
    # Variables
    @variable(model, z[1:D], Bin)
    @variable(model, x[1:R, 1:D, 1:S] >= 0)
    @variable(model, y[1:D, 1:N, 1:S] >= 0)
    @variable(model, u[1:N, 1:S] >= 0)
    @variable(model, t[1:S] >= 0)
    
    # Objective with normalization
    @expression(model, fixed_costs, F * sum(z[j] for j in 1:D))
    
    @expression(model, transport_cost[s in 1:S],
        sum(c_ij[i,j] * x[i,j,s] for i in 1:R, j in 1:D) +
        sum(c_jk[j,k] * y[j,k,s] for j in 1:D, k in 1:N)
    )
    
    # NORMALIZED objective
    @objective(model, Min, 
        fixed_costs + 
        p * sum(
            w_c * transport_cost[s] / norm_cost +      # Normalize cost
            w_e * t[s] / norm_equity                    # Normalize equity
            for s in 1:S
        )
    )
    
    # ... rest of constraints (same as before) ...
    
    @constraint(model, supply_constraint[i in 1:R, s in 1:S],
        sum(x[i,j,s] for j in 1:D) == restaurant_data.supply[i]
    )
    
    @constraint(model, flow_conservation[j in 1:D, s in 1:S],
        sum(y[j,k,s] for k in 1:N) == sum(x[i,j,s] for i in 1:R)
    )
    
    @constraint(model, demand_2023_constraint[k in 1:N],
        sum(y[j,k,1] for j in 1:D) + u[k,1] == demand_df.y2023[k]
    )
    
    @constraint(model, demand_2024_constraint[k in 1:N],
        sum(y[j,k,2] for j in 1:D) + u[k,2] == demand_df.y2024[k]
    )
    
    @constraint(model, demand_2025_constraint[k in 1:N],
        sum(y[j,k,3] for j in 1:D) + u[k,3] == demand_df.y2025[k]
    )
    
    @constraint(model, center_capacity[j in 1:D, s in 1:S],
        sum(x[i,j,s] for i in 1:R) <= Q * z[j]
    )
    
    @constraint(model, equity_constraint[k in 1:N, s in 1:S],
        t[s] >= u[k,s]
    )
    
    optimize!(model)
    
    # Extract results with UNNORMALIZED values for interpretation
    if termination_status(model) in [MOI.OPTIMAL, MOI.TIME_LIMIT, MOI.ALMOST_OPTIMAL]
        
        fixed_cost_value = F * sum(value.(z))
        transport_costs = [value(transport_cost[s]) for s in 1:S]
        expected_transport = p * sum(transport_costs)
        worst_unmet = [value(t[s]) for s in 1:S]
        expected_equity = p * sum(worst_unmet)
        
        results = Dict(
            "status" => termination_status(model),
            "solve_time" => solve_time(model),
            
            # UNNORMALIZED values for interpretation
            "objective" => objective_value(model),
            "fixed_cost" => fixed_cost_value,
            "expected_transport_cost" => expected_transport,
            "expected_equity_penalty" => expected_equity,
            
            # Normalized values (what was actually optimized)
            "normalized_transport" => expected_transport / norm_cost,
            "normalized_equity" => expected_equity / norm_equity,
            
            "w_cost" => w_c,
            "w_equity" => w_e,
            
            "centers_opened" => findall(x -> x > 0.5, value.(z)),
            "num_centers" => sum(value.(z) .> 0.5),
            
            "worst_unmet_by_scenario" => Dict(
                "2023" => worst_unmet[1],
                "2024" => worst_unmet[2],
                "2025" => worst_unmet[3]
            ),
            "total_unmet_by_scenario" => Dict(
                "2023" => sum(value(u[k,1]) for k in 1:N),
                "2024" => sum(value(u[k,2]) for k in 1:N),
                "2025" => sum(value(u[k,3]) for k in 1:N)
            )
        )
        
        return results
    else
        @warn "Optimization failed"
        return nothing
    end
end

# Updated Pareto frontier function
function generate_pareto_frontier_normalized(
    restaurant_data::DataFrame,
    food_scrap_locations::DataFrame,
    demand_df::DataFrame,
    c_ij::Matrix,
    c_jk::Matrix;
    n_points::Int = 11,
    F::Float64 = 10000.0
)
    
    # Step 1: Get normalization factors
    norm_cost, norm_equity = get_normalization_factors(
        restaurant_data, food_scrap_locations, demand_df,
        c_ij, c_jk; F = F
    )
    
    # Step 2: Generate weight pairs
    weight_pairs = [(w, 1-w) for w in range(0, 1, length=n_points)]
    pareto_results = []
    
    println("\nGenerating Pareto frontier with normalized objectives...")
    
    for (i, (w_c, w_e)) in enumerate(weight_pairs)
        println("\nPoint $i/$n_points: w_cost=$w_c, w_equity=$w_e")
        
        result = solve_stochastic_normalized(
            restaurant_data, food_scrap_locations, demand_df,
            c_ij, c_jk, w_c, w_e, norm_cost, norm_equity;
            F = F
        )
        
        push!(pareto_results, result)
        
        if !isnothing(result) && result["status"] == MOI.OPTIMAL
            println("  Transport cost: $(round(result["expected_transport_cost"], digits=2))")
            println("  Avg worst unmet: $(round(mean(values(result["worst_unmet_by_scenario"])), digits=2))")
        end
    end
    
    return pareto_results, norm_cost, norm_equity
end

generate_pareto_frontier_normalized (generic function with 1 method)

In [None]:
# Step 1: Calculate normalization factors (run this once)
println("Step 1: Calculating normalization factors...")
norm_cost, norm_equity = get_normalization_factors(
    restaurant_data,
    food_scrap_locations,
    demand_df,
    c_ij,
    c_jk;
    F = 10000.0
)

println("\nNormalization factors obtained:")
println("  Transport cost normalizer: $(round(norm_cost, digits=2))")
println("  Equity penalty normalizer: $(round(norm_equity, digits=2))")

# Step 2: Solve with specific weights (prioritizing equity)
println("\n" * "="^60)
println("Step 2: Solving with normalized objectives...")
println("="^60)

result = solve_stochastic_normalized(
    restaurant_data,
    food_scrap_locations,
    demand_df,
    c_ij,
    c_jk,
    0.3,  # w_c - LOW weight on cost
    0.7,  # w_e - HIGH weight on equity
    norm_cost,
    norm_equity;
    F = 10000.0,
    time_limit = 60,
    mip_gap = 0.02
)

# Step 3: Display results
if !isnothing(result)
    println("\n" * "="^60)
    println("RESULTS")
    println("="^60)
    println("Status: ", result["status"])
    println("Solve time: $(round(result["solve_time"], digits=2)) seconds")
    println("\nObjective value: $(round(result["objective"], digits=2))")
    println("\nCost Breakdown:")
    println("  Fixed cost: \$$(round(result["fixed_cost"], digits=2))")
    println("  Expected transport cost: \$$(round(result["expected_transport_cost"], digits=2))")
    println("  Expected equity penalty: $(round(result["expected_equity_penalty"], digits=2))")
    println("\nNormalized values (0-1 scale):")
    println("  Normalized transport: $(round(result["normalized_transport"], digits=3))")
    println("  Normalized equity: $(round(result["normalized_equity"], digits=3))")
    println("\nNetwork Design:")
    println("  Number of centers opened: $(result["num_centers"])")
    println("  Center locations: $(result["centers_opened"])")
    println("\nEquity Metrics:")
    println("  Worst unmet demand 2023: $(round(result["worst_unmet_by_scenario"]["2023"], digits=2))")
    println("  Worst unmet demand 2024: $(round(result["worst_unmet_by_scenario"]["2024"], digits=2))")
    println("  Worst unmet demand 2025: $(round(result["worst_unmet_by_scenario"]["2025"], digits=2))")
    println("\nTotal Unmet Demand:")
    println("  2023: $(round(result["total_unmet_by_scenario"]["2023"], digits=2))")
    println("  2024: $(round(result["total_unmet_by_scenario"]["2024"], digits=2))")
    println("  2025: $(round(result["total_unmet_by_scenario"]["2025"], digits=2))")
else
    println("Optimization failed!")
end

Step 1: Calculating normalization factors...
Calculating normalization factors...
  Solving for minimum cost...
Set parameter Username
Set parameter LicenseID to value 2702779
Academic license - for non-commercial use only - expires 2026-09-03
Set parameter TimeLimit to value 300
Set parameter MIPGap to value 0.01
Set parameter MIPGap to value 0.01
Set parameter TimeLimit to value 300
  Solving for minimum equity penalty...


In [26]:
# Check the dimensions
println("Demand 2023 length: ", length(demand_2023))
println("Demand 2024 length: ", length(demand_2024))
println("Demand 2025 length: ", length(demand_2025))
println("Demand DataFrame rows: ", nrow(demand_df))
println("N (neighborhoods): ", N)
println("c_jk dimensions: ", size(c_jk))

Demand 2023 length: 197
Demand 2024 length: 197
Demand 2025 length: 197
Demand DataFrame rows: 197
N (neighborhoods): 591
c_jk dimensions: (201, 591)


# Extract the data needed for visualization

In [None]:
# ============================================================================
# 1. Generate Pareto Frontier (optimized - fewer redundant points)
# ============================================================================
results = DataFrame(
    w_cost = Float64[],
    w_eq = Float64[],
    obj_value = Float64[],
    num_centers = Int[],
    avg_transport_cost = Float64[],
    avg_equity_t = Float64[],
    avg_total_recv = Float64[],
    avg_total_unmet = Float64[],
    t_2023 = Float64[],
    t_2024 = Float64[],
    t_2025 = Float64[]
)

# Streamlined weight pairs (~35 instead of ~100)
# weight_pairs = vcat(
#     [(1.0, w) for w in [0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 8.0, 15.0, 25.0]],  # 9 pts
#     [(w, 1.0) for w in [2.0, 3.0, 5.0, 8.0]],                              # 4 pts
#     [(1.0, w) for w in 4.0:1.0:7.0],                                       # 4 pts - target gap
#     [(2.0, w) for w in 2.0:1.0:6.0],                                       # 5 pts - target gap
#     [(3.0, w) for w in 2.0:1.0:5.0],                                       # 4 pts - target gap
#     [(w, w) for w in [1.5, 2.0, 2.5, 3.0, 4.0]],                          # 5 pts - diagonal
#     [(1.5, w) for w in [3.0, 4.0, 5.0, 6.0]]                              # 4 pts - target gap
# )

#Weight pairs to change with different weights for equity


# Minimal weight pairs for testing (~10 points)
weight_pairs = [
    (1.0, 0.5),   # cost-focused
    (1.0, 1.0),   # balanced
    (1.0, 2.0),
    (1.0, 5.0),
    (1.0, 10.0),
    (1.0, 20.0),  # equity-focused
    (2.0, 1.0),
    (5.0, 1.0),
    (1.0, 3.0),
    (1.0, 7.0)
]

# Remove duplicates
weight_pairs = unique(weight_pairs)

for (w_cost, w_eq) in weight_pairs
    model, x, y, u, r, t, z, _, _ = build_combined_model(w_cost, w_eq)
    optimize!(model)
    
    termination_status(model) != MOI.OPTIMAL && continue
    
    num_centers = Int(round(sum(value.(z))))
    
    transport_costs = [sum(cij[i,j] * value(x[s,i,j]) for i in 1:R, j in 1:D) +
                       sum(cjk[j,k] * value(y[s,j,k]) for j in 1:D, k in 1:N) for s in 1:S]
    
    total_recvs = [sum(value.(r[s,:])) for s in 1:S]
    total_unmets = [sum(value.(u[s,:])) for s in 1:S]
    equity_ts = [value(t[s]) for s in 1:S]
    
    push!(results, (
        w_cost, w_eq,
        objective_value(model),
        num_centers,
        mean(transport_costs),
        mean(equity_ts),
        mean(total_recvs),
        mean(total_unmets),
        equity_ts[1], equity_ts[2], equity_ts[3]
    ))
end
 
CSV.write("viz_data/pareto_results.csv", results)

In [None]:
# ============================================================================
# 2. Export Location Data
# ============================================================================
CSV.write("viz_data/restaurants.csv", DataFrame(
    id = 1:R,
    latitude = restaurant_data.latitude,
    longitude = restaurant_data.longitude,
    supply = restaurant_data.supply
))

CSV.write("viz_data/donation_centers.csv", DataFrame(
    id = 1:D,
    latitude = food_scrap_locations.latitude,
    longitude = food_scrap_locations.longitude
))

# Update the neighborhoods export to include names
# Reload original neighborhood data to get names
neighborhood_full = CSV.read(neighborhood_path, DataFrame)
nbhd_2023_full = neighborhood_full[neighborhood_full.Year .== 2023, :]

# Export neighborhoods with names
CSV.write("viz_data/neighborhoods.csv", DataFrame(
    id = 1:N,
    name = nbhd_2023_full[!, "Neighborhood.Tabulation.Area..NTA..Name"],
    latitude = nbhd_2023_full.latitude,
    longitude = nbhd_2023_full.longitude,
    demand_2023 = demand_df.y2023,
    demand_2024 = demand_df.y2024,
    demand_2025 = demand_df.y2025
))

# ============================================================================
# 3. Export Scenario-Specific Results for a Balanced Solution
# ============================================================================
model_bal, x_bal, y_bal, u_bal, r_bal, t_bal, z_bal, _, _ = build_combined_model(1.0, 3.0)
optimize!(model_bal)

if termination_status(model_bal) == MOI.OPTIMAL
    # Centers opened
    CSV.write("viz_data/centers_opened.csv", DataFrame(
        id = 1:D,
        opened = Int.(round.(value.(z_bal)))
    ))
    
    # Per-scenario allocations
    for s in 1:S
        year = 2022 + s
        CSV.write("viz_data/allocations_$year.csv", DataFrame(
            neighborhood_id = 1:N,
            received = value.(r_bal[s,:]),
            unmet = value.(u_bal[s,:])
        ))
    end
    
    # Flows for scenario 1 (2023)
    flows = DataFrame(from_type=String[], from_id=Int[], to_type=String[], to_id=Int[], flow=Float64[])
    for i in 1:R, j in 1:D
        value(x_bal[1,i,j]) > 1e-3 && push!(flows, ("restaurant", i, "center", j, value(x_bal[1,i,j])))
    end
    for j in 1:D, k in 1:N
        value(y_bal[1,j,k]) > 1e-3 && push!(flows, ("center", j, "neighborhood", k, value(y_bal[1,j,k])))
    end
    CSV.write("viz_data/flows_2023.csv", flows)
end

# ============================================================================
# 4. Export Scenario Comparison Data
# ============================================================================
scenario_summary = DataFrame(
    scenario = ["2023", "2024", "2025"],
    total_demand = [sum(demand_df.y2023), sum(demand_df.y2024), sum(demand_df.y2025)],
    total_received = [sum(value.(r_bal[s,:])) for s in 1:S],
    total_unmet = [sum(value.(u_bal[s,:])) for s in 1:S],
    worst_unmet = [value(t_bal[s]) for s in 1:S]
)
CSV.write("viz_data/scenario_summary.csv", scenario_summary)

In [None]:
# ============================================================================
# 5. Export Allocations for ALL Pareto Solutions (for animation)
# ============================================================================
mkpath("viz_data/pareto_allocations")

for (idx, (w_cost, w_eq)) in enumerate(weight_pairs)
    model, x, y, u, r, t, z, _, _ = build_combined_model(w_cost, w_eq)
    optimize!(model)
    
    termination_status(model) != MOI.OPTIMAL && continue
    
    # Average unmet across scenarios
    avg_unmet = [mean([value(u[s,k]) for s in 1:S]) for k in 1:N]
    
    CSV.write("viz_data/pareto_allocations/alloc_$(idx).csv", DataFrame(
        neighborhood_id = 1:N,
        avg_unmet = avg_unmet
    ))
end

println("Exported $(length(weight_pairs)) allocation files")