In [48]:
using CSV
using DataFrames

using JuMP
using Gurobi

using LinearAlgebra
using Statistics

## Import data

In [57]:
# Paths (relative to notebook structure)
restaurant_path    = "../clean_data/restaurant_data.csv"
scrap_path         = "../clean_data/food_scrap_locations.csv"
neighborhood_path  = "../clean_data/neighborhood_supply.csv"

# Read CSVs into DataFrames
restaurant_data       = CSV.read(restaurant_path, DataFrame)
food_scrap_locations  = CSV.read(scrap_path, DataFrame)
neighborhood_supply   = CSV.read(neighborhood_path, DataFrame)

# Preview the first few rows
println(first(restaurant_data, 5))
println(first(food_scrap_locations, 5))
println(first(neighborhood_supply, 5))


[1m5×3 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m[1m waste   [0m
     │[90m Float64  [0m[90m Float64   [0m[90m Float64 [0m
─────┼──────────────────────────────
   1 │  40.6313   -73.9472  22593.5
   2 │  40.7144   -73.8319  54589.8
   3 │  40.7893   -73.9753  57175.8
   4 │  40.7498   -73.9728  26668.3
   5 │  40.7578   -73.9825  74368.1
[1m5×27 DataFrame[0m
[1m Row [0m│[1m Borough   [0m[1m NTAName                 [0m[1m SiteName                          [0m[1m SiteAddr                          [0m[1m Hosted_By                      [0m[1m Open_Month [0m[1m Day_Hours                         [0m[1m Notes                     [0m[1m Website                           [0m[1m BoroCD [0m[1m CouncilDis [0m[1m ct2010  [0m[1m BBL      [0m[1m BIN     [0m[1m Latitude [0m[1m Longitude [0m[1m PolicePrec [0m[1m Object.ID [0m[1m Location.Point               [0m[1m App.Android [0m[1m App.iOS [0m[1m X.Assembly.District [0m[1

# Clean the data:

## Ensure no commas in numbers, each field cast as correct type, drop unnecessary columns, and no negative supply

In [59]:
# ================================
# 3. CLEAN RESTAURANT
# ================================
# Columns: latitude | longitude | waste
rename!(restaurant_data, names(restaurant_data)[3] => :supply)

# Ensure Float64
restaurant_data.supply    = Float64.(restaurant_data.supply)
restaurant_data.latitude  = Float64.(restaurant_data.latitude)
restaurant_data.longitude = Float64.(restaurant_data.longitude)

# *** DROP RESTAURANTS WITH NEGATIVE SUPPLY ***
filter!(row -> row.supply >= 0, restaurant_data)



# ================================
# 3. CLEAN FOOD SCRAP CENTER DATA
# ================================
# rename Latitude and Longitude to latitude and longitude for consistency
rename!(food_scrap_locations, names(food_scrap_locations)[15] => :latitude)
rename!(food_scrap_locations, names(food_scrap_locations)[16] => :longitude)

# Keep only coordinates we need
food_scrap_locations.latitude = Float64.(food_scrap_locations.latitude)
food_scrap_locations.longitude = Float64.(food_scrap_locations.longitude)

# Keep only coordinate columns in food_scrap_locations
select!(food_scrap_locations, [:latitude, :longitude])

# ================================
# 4. CLEAN NEIGHBORHOOD SUPPLY DATA
# ================================
# Rename demand column for clarity
rename!(neighborhood_supply, names(neighborhood_supply)[4] => :supply_gap)

# Now neighborhood_supply.supply_gap might be String OR Float64.
# Only do replace/parse if it's strings.
if eltype(neighborhood_supply.supply_gap) <: AbstractString
    neighborhood_supply.supply_gap =
        parse.(Float64, replace.(neighborhood_supply.supply_gap, "," => ""))
end

# Demand = positive deficit, surplus -> 0
neighborhood_supply.demand = max.(0.0, -neighborhood_supply.supply_gap)

neighborhood_supply.latitude  = Float64.(neighborhood_supply.latitude)
neighborhood_supply.longitude = Float64.(neighborhood_supply.longitude)

# keep only necessary columns from neighborhood supply
select!(neighborhood_supply, [:latitude, :longitude, :demand])

# ================================
# 5. SHOW CLEANED HEADS
# ================================
println("=== Restaurants (cleaned) ===")
println(first(restaurant_data, 5))

println("\n=== Food Scrap Locations (cleaned) ===")
println(first(food_scrap_locations, 5))

println("\n=== Neighborhood Supply (cleaned) ===")
println(first(neighborhood_supply, 5))

=== Restaurants (cleaned) ===
[1m5×3 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m[1m supply  [0m
     │[90m Float64  [0m[90m Float64   [0m[90m Float64 [0m
─────┼──────────────────────────────
   1 │  40.6313   -73.9472  22593.5
   2 │  40.7144   -73.8319  54589.8
   3 │  40.7893   -73.9753  57175.8
   4 │  40.7498   -73.9728  26668.3
   5 │  40.7578   -73.9825  74368.1

=== Food Scrap Locations (cleaned) ===
[1m5×2 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m
     │[90m Float64  [0m[90m Float64   [0m
─────┼─────────────────────
   1 │  40.6355   -74.0228
   2 │  40.7526   -73.969
   3 │  40.7635   -74.0002
   4 │  40.762    -73.9693
   5 │  40.7174   -74.0108

=== Neighborhood Supply (cleaned) ===
[1m5×3 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m[1m demand    [0m
     │[90m Float64  [0m[90m Float64   [0m[90m Float64   [0m
─────┼────────────────────────────────
   1 │  40.8267   -73.9217  1.02143e5
 

## Get vectors for supply[i] for all restaurants, demand[k] for all neighborhoods, and cij[i,j] and cjk[j,k] to plug directly into JuMP model

In [61]:
# ================
# 6. EXTRACT VECTORS
# ================
R = nrow(restaurant_data)
D = nrow(food_scrap_locations)
N = nrow(neighborhood_supply)

supply = restaurant_data.supply              # s_i
demand = neighborhood_supply.demand          # d_k

# ================
# 7. MANHATTAN DISTANCE FUNCTION
# ================
manhattan(lat1, lon1, lat2, lon2) = abs(lat1 - lat2) + abs(lon1 - lon2)

# ================
# 8. COST MATRICES
# ================
# cij: Restaurants (i) → Donation centers (j)
cij = [manhattan(restaurant_data.latitude[i], restaurant_data.longitude[i],
                 food_scrap_locations.latitude[j], food_scrap_locations.longitude[j])
       for i in 1:R, j in 1:D]

# cjk: Donation centers (j) → Neighborhoods (k)
cjk = [manhattan(food_scrap_locations.latitude[j], food_scrap_locations.longitude[j],
                 neighborhood_supply.latitude[k], neighborhood_supply.longitude[k])
       for j in 1:D, k in 1:N]

println("Size of cij (R x D): ", size(cij))
println("Size of cjk (D x N): ", size(cjk))


Size of cij (R x D): (319, 201)
Size of cjk (D x N): (201, 591)


# Sanity check data

In [63]:
println("Demand: min = ", minimum(demand), ", max = ", maximum(demand))
println("Any NaN in demand? ", any(isnan.(demand)))
println("Any Inf in demand? ", any(isinf.(demand)))

println("Any NaN in cij? ", any(isnan.(cij)))
println("Any Inf in cij? ", any(isinf.(cij)))

println("Any NaN in cjk? ", any(isnan.(cjk)))
println("Any Inf in cjk? ", any(isinf.(cjk)))

Demand: min = 0.0, max = 4.27814874096381e6
Any NaN in demand? false
Any Inf in demand? false
Any NaN in cij? false
Any Inf in cij? false
Any NaN in cjk? false
Any Inf in cjk? false


In [65]:
R = nrow(restaurant_data)
D = nrow(food_scrap_locations)
N = nrow(neighborhood_supply)

println("R, D, N = ", (R, D, N))

R, D, N = (319, 201, 591)


In [67]:
println("Size of cij: ", size(cij))  # should be (R, D)
println("Size of cjk: ", size(cjk))  # should be (D, N)

Size of cij: (319, 201)
Size of cjk: (201, 591)


# Cost-Reduction Optimization Model 

We formulate the minimum-cost redistribution problem below.

**Sets**

- $R$: restaurants (supply nodes)  
- $D$: food scrap / donation centers (transshipment nodes)  
- $N$: neighborhoods (demand nodes)

**Parameters**

- $s_i$: supply at restaurant $i \in R$  
- $d_k$: demand at neighborhood $k \in N$  
- $c_{ij}$: cost of transporting one unit from restaurant $i$ to donation center $j$  
- $c_{jk}$: cost of transporting one unit from donation center $j$ to neighborhood $k$  
- $M$: large penalty coefficient for unmet demand

**Decision Variables**

- $x_{ij} \ge 0$: shipment from restaurant $i$ to donation center $j$  
- $y_{jk} \ge 0$: shipment from donation center $j$ to neighborhood $k$  
- $u_k \ge 0$: unmet demand at neighborhood $k$

---

### **Objective: Minimize Total Cost**

$ \displaystyle
\min\;
\sum_{i \in R} \sum_{j \in D} c_{ij} x_{ij}
\;+\;
\sum_{j \in D} \sum_{k \in N} c_{jk} y_{jk}
\;+\;
M \sum_{k \in N} u_k
$

---

### **Constraints**

**1. Restaurant supply limits**

$ \displaystyle
\sum_{j \in D} x_{ij} \le s_i \quad \forall i \in R
$

**2. Neighborhood demand balance**

$ \displaystyle
\sum_{j \in D} y_{jk} + u_k = d_k \quad \forall k \in N
$

**3. Flow conservation at donation centers**

$ \displaystyle
\sum_{k \in N} y_{jk}
=
\sum_{i \in R} x_{ij}
\quad \forall j \in D
$

**4. Nonnegativity**

$ x_{ij},\; y_{jk},\; u_k \ge 0 $


In [69]:
println("Any supply < 0? ", any(supply .< 0))
println("Any demand < 0? ", any(demand .< 0))
println("Any NaN in supply? ", any(isnan.(supply)))
println("Any NaN in demand? ", any(isnan.(demand)))


Any supply < 0? false
Any demand < 0? false
Any NaN in supply? false
Any NaN in demand? false


In [91]:
# ================================
# 9. REDUCED COST OPTIMIZATION MODEL
# ================================

# Big-M penalty for unmet demand
# Here we pick M as the total demand so that leaving demand unmet is very expensive
M = sum(demand)

model_cost = Model(Gurobi.Optimizer)

set_silent(model_cost)

# Decision variables:
# x[i,j] = flow of food from restaurant i to donation center j
# y[j,k] = flow of food from donation center j to neighborhood k
# u[k]   = unmet demand at neighborhood k
@variable(model_cost, x[1:R, 1:D] >= 0)
@variable(model_cost, y[1:D, 1:N] >= 0)
@variable(model_cost, u[1:N] >= 0)

# Objective:
# Minimize transportation cost + big-M penalty on unmet demand
@objective(model_cost, Min,
    sum(cij[i,j] * x[i,j] for i in 1:R, j in 1:D) +
    sum(cjk[j,k] * y[j,k] for j in 1:D, k in 1:N) +
    M * sum(u[k] for k in 1:N)
)

# Constraints:

# 1) Restaurant supply: cannot ship more than available surplus
@constraint(model_cost, [i in 1:R],
    sum(x[i,j] for j in 1:D) <= supply[i]
)

# 2) Neighborhood demand balance:
#    inflow from centers + unmet demand = demand
@constraint(model_cost, [k in 1:N],
    sum(y[j,k] for j in 1:D) + u[k] == demand[k]
)

# 3) Donation centers are pure transshipment nodes:
#    total inflow from restaurants = total outflow to neighborhoods
@constraint(model_cost, [j in 1:D],
    sum(y[j,k] for k in 1:N) == sum(x[i,j] for i in 1:R)
)

# ================================
# 10. SOLVE AND INSPECT
# ================================
optimize!(model_cost)

total_demand   = sum(demand)
total_received = sum(value.(y))   # sum over all j,k
total_unmet    = sum(value.(u))   # sum over all k

reduction_abs = total_received
reduction_pct = 100 * reduction_abs / total_demand

println("Termination status: ", termination_status(model_cost))
println("Objective value (total cost + penalty): ", objective_value(model_cost))
println("Total neighborhood demand: ", total_demand)
println("Total received by neighborhoods: ", total_received)
println("Total unmet demand: ", total_unmet)
println("Demand reduction (absolute): ", reduction_abs)
println("Demand reduction (%): ", reduction_pct, "%")

# After optimize!(model_cost)

# Per-neighborhood received under COST model
r_cost = [sum(value.(y[j, k]) for j in 1:D) for k in 1:N]

println("=== Neighborhoods That Received Food ===\n")

count_nonzero_cost = 0
for k in 1:N
    if r_cost[k] > 0
        count_nonzero_cost += 1
        println("Neighborhood $(k): received $(round(r_cost[k], digits=2)) units")
    end
end

println("\nTotal neighborhoods receiving any food: $count_nonzero_cost")


Set parameter Username
Academic license - for non-commercial use only - expires 2026-08-20
Termination status: OPTIMAL
Objective value (total cost + penalty): 2.079347469557809e16
Total neighborhood demand: 1.5243933021550444e8
Total received by neighborhoods: 1.6034409875006378e7
Total unmet demand: 1.3640492034049806e8
Demand reduction (absolute): 1.6034409875006378e7
Demand reduction (%): 10.518551775541411%
=== Neighborhoods That Received Food (COST model) ===

Neighborhood 1: received 102142.62 units
Neighborhood 4: received 40833.65 units
Neighborhood 28: received 267239.25 units
Neighborhood 50: received 105498.51 units
Neighborhood 114: received 16549.04 units
Neighborhood 126: received 86946.54 units
Neighborhood 135: received 27850.87 units
Neighborhood 147: received 50156.56 units
Neighborhood 156: received 44561.02 units
Neighborhood 158: received 605451.43 units
Neighborhood 163: received 105866.87 units
Neighborhood 182: received 2.47615582e6 units
Neighborhood 203: recei

## Equity objective: minimizing worst unmet demand

We now consider an **equity-focused** objective that tries to avoid leaving any neighborhood with a very large unmet demand. Instead of minimizing total cost, we minimize the **maximum unmet demand** across neighborhoods.

### Sets

- $R$: set of restaurants, indexed by $i$
- $D$: set of food scrap centers, indexed by $j$
- $N$: set of neighborhoods, indexed by $k$

### Parameters

- $s_i$: supply (surplus food) at restaurant $i \in R$
- $d_k$: demand (food needed) in neighborhood $k \in N$

### Decision variables

- $x_{ij} \ge 0$: amount shipped from restaurant $i$ to donation center $j$
- $y_{jk} \ge 0$: amount shipped from donation center $j$ to neighborhood $k$
- $r_k$: food received by neighborhood $k$
- $t$: upper bound on unmet demand across all neighborhoods

### Equity optimization problem

We minimize the worst (largest) unmet demand:

$ \min \ t $

subject to:

$ \sum_{j \in D} x_{ij} \le s_i \quad \forall i \in R $  &nbsp; (restaurant supply)

$ \sum_{k \in N} y_{jk} = \sum_{i \in R} x_{ij} \quad \forall j \in D $  &nbsp; (flow conservation at centers)

$ r_k = \sum_{j \in D} y_{jk} \quad \forall k \in N $  &nbsp; (received by each neighborhood)

$ r_k \le d_k \quad \forall k \in N $  &nbsp; (cannot exceed demand)

$ t \ge d_k - r_k \quad \forall k \in N $  &nbsp; (unmet demand in each neighborhood)

$ x_{ij}, \, y_{jk} \ge 0 $

Interpretation:  
- $r_k$ captures how much food neighborhood $k$ actually receives._


In [89]:
# ================================
# Build index sets and parameters
# ================================

R = nrow(restaurant_data)          # number of restaurants
D = nrow(food_scrap_locations)     # number of food scrap centers
N = nrow(neighborhood_supply)      # number of neighborhoods

s = restaurant_data.supply         # s_i: supply at restaurant i
d = neighborhood_supply.demand     # d_k: demand at neighborhood k

# ================================
# Equity model: minimize worst unmet demand
# ================================
model_equity = Model(Gurobi.Optimizer)

set_silent(model_equity)

# Decision variables
@variable(model_equity, x[1:R, 1:D] >= 0)   # shipments restaurant -> center
@variable(model_equity, y[1:D, 1:N] >= 0)   # shipments center -> neighborhood
@variable(model_equity, r[1:N] >= 0)        # received by each neighborhood
@variable(model_equity, t >= 0)             # worst unmet demand across neighborhoods

# Objective: minimize worst unmet demand
@objective(model_equity, Min, t)

# 1) Restaurant supply: ∑_j x_ij ≤ s_i   ∀ i ∈ R
@constraint(model_equity, [i in 1:R],
    sum(x[i,j] for j in 1:D) <= s[i]
)

# 2) Flow conservation at centers:
#    ∑_k y_jk = ∑_i x_ij   ∀ j ∈ D
@constraint(model_equity, [j in 1:D],
    sum(y[j,k] for k in 1:N) == sum(x[i,j] for i in 1:R)
)

# 3) Neighborhood received:
#    r_k = ∑_j y_jk   ∀ k ∈ N
@constraint(model_equity, [k in 1:N],
    r[k] == sum(y[j,k] for j in 1:D)
)

# 4) Cannot exceed demand:
#    r_k ≤ d_k   ∀ k ∈ N
@constraint(model_equity, [k in 1:N],
    r[k] <= d[k]
)

# 5) Worst unmet demand:
#    t ≥ d_k - r_k   ∀ k ∈ N
@constraint(model_equity, [k in 1:N],
    t >= d[k] - r[k]
)

# ================================
# Solve and report
# ================================
optimize!(model_equity)

println("Equity model termination status: ", termination_status(model_equity))
println("Optimal worst unmet demand t*: ", value(t))

total_demand   = sum(d)
total_received = sum(value.(r))
total_unmet    = total_demand - total_received

println("Total demand:   ", total_demand)
println("Total received: ", total_received)
println("Total unmet:    ", total_unmet)
println("Demand reduction (%): ", 100 * total_received / total_demand, "%")


# Pull received amounts
r_vals = value.(r)

println("=== Neighborhoods That Received Food ===\n")

count_nonzero = 0
for k in 1:N
    if r_vals[k] > 0
        count_nonzero += 1
        println("Neighborhood $(k): received $(round(r_vals[k], digits=2)) units")
    end
end

println("\nTotal neighborhoods receiving any food: $count_nonzero")


Set parameter Username
Academic license - for non-commercial use only - expires 2026-08-20
OPTIMALmodel termination status: 
Optimal worst unmet demand t*: 2.095261055422161e6
Total demand:   1.5243933021550444e8
Total received: 1.6034409875006367e7
Total unmet:    1.3640492034049806e8
Demand reduction (%): 10.518551775541404%
=== Neighborhoods That Received Food ===

Neighborhood 69: received 136133.12 units
Neighborhood 129: received 836647.81 units
Neighborhood 156: received 407409.82 units
Neighborhood 165: received 74294.73 units
Neighborhood 168: received 3595.88 units
Neighborhood 172: received 233734.14 units
Neighborhood 182: received 1.17746402e6 units
Neighborhood 183: received 466255.44 units
Neighborhood 191: received 188186.17 units
Neighborhood 195: received 234778.29 units
Neighborhood 196: received 1.34196352e6 units
Neighborhood 207: received 370841.07 units
Neighborhood 242: received 167943.74 units
Neighborhood 248: received 1.08719332e6 units
Neighborhood 250: rece