In [13]:
import pandas as pd
import numpy as np
import geopandas as gpd
from haversine import haversine, Unit
import gurobipy as gp
from gurobipy import GRB

# Load and prepare data
nta_table = pd.read_csv("data/model/nta_table.csv")
pantries_table = pd.read_csv("data/model/pantries_table.csv")
total_supply = pd.read_csv("data/model/total_supplies.csv")

In [53]:
most_recent = total_supply.sort_values(by='Report Start Date',ascending=False)['Report Start Date'].iloc[0]

TOTAL_SUPPLY = total_supply[total_supply['Report Start Date'] == most_recent].Number.iloc[0]
TOTAL_SUPPLY = int(TOTAL_SUPPLY.replace(',', ''))

In [58]:
# Clean supply gap column
SUPPLY_GAP_COL = "Supply Gap (lbs.)"
nta_table[SUPPLY_GAP_COL] = nta_table[SUPPLY_GAP_COL].apply(
    lambda x: float(str(x).replace(',', ''))
)

print(f"Loaded {len(nta_table)} neighborhoods and {len(pantries_table)} pantries")

# Separate excess and deficit NTAs
EXCESS_NTA = nta_table[nta_table[SUPPLY_GAP_COL] < 0].copy()
DEFICIT_NTA = nta_table[nta_table[SUPPLY_GAP_COL] > 0].copy()

excess_nta_ids = set(EXCESS_NTA['nta2020'].astype(str).str.strip().str.upper())
deficit_nta_ids = set(DEFICIT_NTA['nta2020'].astype(str).str.strip().str.upper())

print(f"\nExcess NTAs: {len(excess_nta_ids)}")
print(f"Deficit NTAs: {len(deficit_nta_ids)}")

# Filter pantries by NTA membership
# Ensure pantries table has clean NTA codes
pantries_table['nta2020_clean'] = pantries_table['nta2020'].astype(str).str.strip().str.upper()

# Filter pantries
excess_pantries = pantries_table[
    pantries_table['nta2020_clean'].isin(excess_nta_ids)
].copy()

deficit_pantries = pantries_table[
    pantries_table['nta2020_clean'].isin(deficit_nta_ids)
].copy()

# Ensure 'id' column exists and is string
excess_pantries['id'] = excess_pantries['id'].astype(str)
deficit_pantries['id'] = deficit_pantries['id'].astype(str)

print(f"Excess pantries: {len(excess_pantries)}")
print(f"Deficit pantries: {len(deficit_pantries)}")

NUM_DEFICIT_PANTRIES = len(deficit_pantries)
NUM_EXCESS_PANTRIES = len(excess_pantries)

# Build supply and demand dictionaries using pantry IDs
# Create NTA lookup dictionary for faster access
nta_lookup = {}
for _, row in nta_table.iterrows():
    nta_code = str(row['nta2020']).strip().upper()
    nta_lookup[nta_code] = {
        'gap': float(row[SUPPLY_GAP_COL]),
        'pantry_count': int(row['pantry_count']) if pd.notna(row['pantry_count']) else 1
    }

# Build supply dictionary (keyed by pantry ID)
supply = {}
for _, row in excess_pantries.iterrows():
    pantry_id = str(row['id'])
    nta_code = row['nta2020_clean']
    
    if nta_code in nta_lookup:
        gap = nta_lookup[nta_code]['gap']
        # n_pantries = max(nta_lookup[nta_code]['pantry_count'], 1)
        supply[pantry_id] = TOTAL_SUPPLY / NUM_DEFICIT_PANTRIES if gap < 0 else 0.0
        # supply[pantry_id] = TOTAL_SUPPLY / NUM_EXCESS_PANTRIES if gap < 0 else 0.0
    else:
        supply[pantry_id] = 0.0

# Build demand dictionary (keyed by pantry ID)
demand = {}
for _, row in deficit_pantries.iterrows():
    pantry_id = str(row['id'])
    nta_code = row['nta2020_clean']
    
    if nta_code in nta_lookup:
        gap = nta_lookup[nta_code]['gap']
        n_pantries = max(nta_lookup[nta_code]['pantry_count'], 1)
        demand[pantry_id] = gap / n_pantries if gap > 0 else 0.0
    else:
        demand[pantry_id] = 0.0

print(f"\nSupply pantries with data: {len(supply)}")
print(f"Demand pantries with data: {len(demand)}")
print(f"Total supply (lbs): {sum(supply.values()):,.2f}")
print(f"Total demand (lbs): {sum(demand.values()):,.2f}")

# Compute distance matrix using pantry IDs
def parse_point(geom_str):
    """Extract (lat, lon) from POINT string"""
    if pd.isna(geom_str):
        return None
    try:
        # Handle "POINT (lon lat)" format
        coords = str(geom_str).replace('POINT (', '').replace(')', '').split()
        return (float(coords[1]), float(coords[0]))  # (lat, lon)
    except:
        return None

excess_pantries['coords'] = excess_pantries['geometry'].apply(parse_point)
deficit_pantries['coords'] = deficit_pantries['geometry'].apply(parse_point)

# Remove any pantries with missing coordinates
excess_pantries = excess_pantries.dropna(subset=['coords'])
deficit_pantries = deficit_pantries.dropna(subset=['coords'])

print(f"Excess pantries with valid coords: {len(excess_pantries)}")
print(f"Deficit pantries with valid coords: {len(deficit_pantries)}")

# Get lists of IDs in order
excess_ids = excess_pantries['id'].tolist()
deficit_ids = deficit_pantries['id'].tolist()


Loaded 197 neighborhoods and 515 pantries

Excess NTAs: 55
Deficit NTAs: 142
Excess pantries: 231
Deficit pantries: 283

Supply pantries with data: 231
Demand pantries with data: 283
Total supply (lbs): 6,986,018.72
Total demand (lbs): 96,655,913.95
Excess pantries with valid coords: 231
Deficit pantries with valid coords: 283


### Compute distances

In [59]:
# Compute distance matrix
print("\nComputing distance matrix...")
dist_matrix = np.zeros((len(excess_ids), len(deficit_ids)))

for i, excess_id in enumerate(excess_ids):
    if i % 50 == 0:
        print(f"  Processing excess pantry {i+1}/{len(excess_ids)}")
    
    excess_coord = excess_pantries[excess_pantries['id'] == excess_id]['coords'].iloc[0]
    for j, deficit_id in enumerate(deficit_ids):
        deficit_coord = deficit_pantries[deficit_pantries['id'] == deficit_id]['coords'].iloc[0]
        dist_matrix[i, j] = haversine(excess_coord, deficit_coord, unit=Unit.MILES)

# Create DataFrame with pantry IDs as index/columns
dist_df_pantry = pd.DataFrame(
    dist_matrix,
    index=excess_ids,
    columns=deficit_ids
)

print(f"\nDistance matrix shape: {dist_df_pantry.shape}")
print(f"Distance range: {dist_matrix.min():.2f} to {dist_matrix.max():.2f} miles")


Computing distance matrix...
  Processing excess pantry 1/231
  Processing excess pantry 51/231
  Processing excess pantry 101/231
  Processing excess pantry 151/231
  Processing excess pantry 201/231

Distance matrix shape: (231, 283)
Distance range: 0.04 to 33.10 miles


### Parameters

In [56]:
# Verify alignment and clean data
# Update supply/demand dicts to only include pantries in distance matrix
supply = {pid: supply[pid] for pid in excess_ids if pid in supply}
demand = {pid: demand[pid] for pid in deficit_ids if pid in demand}

# Remove pantries with zero supply or demand (optional - helps reduce problem size)
supply = {k: v for k, v in supply.items() if v > 0}
demand = {k: v for k, v in demand.items() if v > 0}

# Update excess_ids and deficit_ids to match
excess_ids = [pid for pid in excess_ids if pid in supply]
deficit_ids = [pid for pid in deficit_ids if pid in demand]

# Filter distance matrix
dist_df_pantry = dist_df_pantry.loc[excess_ids, deficit_ids]

print(f"Supply dict keys: {len(supply)}")
print(f"Demand dict keys: {len(demand)}")
print(f"Distance matrix rows (excess): {dist_df_pantry.shape[0]}")
print(f"Distance matrix columns (deficit): {dist_df_pantry.shape[1]}")
print(f"All supply keys in distance index: {set(supply.keys()) == set(dist_df_pantry.index)}")
print(f"All demand keys in distance columns: {set(demand.keys()) == set(dist_df_pantry.columns)}")
print(f"\nTotal supply available: {sum(supply.values()):,.2f} lbs")
print(f"Total demand needed: {sum(demand.values()):,.2f} lbs")
print(f"Supply/Demand ratio: {sum(supply.values())/sum(demand.values()):.2%}")

Supply dict keys: 231
Demand dict keys: 283
Distance matrix rows (excess): 231
Distance matrix columns (deficit): 283
All supply keys in distance index: True
All demand keys in distance columns: True

Total supply available: 8,558,629.00 lbs
Total demand needed: 96,655,913.95 lbs
Supply/Demand ratio: 8.85%


### Model Building

In [57]:
# OPTIMIZATION MODEL
print("\n" + "="*60)
print("BUILDING OPTIMIZATION MODEL")
print("="*60)

LAMBDA_UNMET = 100
WEIGHT_UNMET = 100

# Build model
model = gp.Model("pantry_rebalance")
model.setParam("OutputFlag", 1)

# Decision variables
x = {(i, j): model.addVar(lb=0.0, name=f"x_{i}_{j}") 
     for i in excess_ids for j in deficit_ids}

u = {j: model.addVar(lb=0.0, name=f"u_{j}") for j in deficit_ids}

model.update()

print(f"\nVariables created:")
print(f"  Flow variables (x): {len(x):,}")
print(f"  Unmet variables (u): {len(u):,}")

# Objective function
transport_cost = gp.quicksum(
    dist_df_pantry.loc[i, j] * x[i, j]
    for i in excess_ids for j in deficit_ids
)

unmet_cost = LAMBDA_UNMET * gp.quicksum(
    WEIGHT_UNMET * u[j] for j in deficit_ids
)

model.setObjective(transport_cost + unmet_cost, GRB.MINIMIZE)

print("\nObjective function set: minimize (transport_cost + penalty * unmet_demand)")

# Constraints
print("\nAdding constraints...")

# 1. Demand satisfaction
for j in deficit_ids:
    model.addConstr(
        gp.quicksum(x[i, j] for i in excess_ids) + u[j] == demand[j],
        name=f"demand_{j}"
    )

print(f"  Added {len(deficit_ids)} demand constraints")

# 2. Supply limits
for i in excess_ids:
    model.addConstr(
        gp.quicksum(x[i, j] for j in deficit_ids) <= supply[i],
        name=f"supply_{i}"
    )

print(f"  Added {len(excess_ids)} supply constraints")


BUILDING OPTIMIZATION MODEL
Set parameter OutputFlag to value 1

Variables created:
  Flow variables (x): 65,373
  Unmet variables (u): 283

Objective function set: minimize (transport_cost + penalty * unmet_demand)

Adding constraints...
  Added 283 demand constraints
  Added 231 supply constraints


### Solving

In [51]:
# Solve
print("\n" + "="*60)
print("SOLVING OPTIMIZATION MODEL")
print("="*60 + "\n")

model.optimize()


SOLVING OPTIMIZATION MODEL

Gurobi Optimizer version 12.0.0 build v12.0.0rc1 (mac64[rosetta2] - Darwin 23.6.0 23G80)

CPU model: Apple M3
Thread count: 8 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 514 rows, 65656 columns and 131029 nonzeros
Model fingerprint: 0x0d77075e
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [4e-02, 1e+04]
  Bounds range     [0e+00, 0e+00]
  RHS range        [2e+04, 2e+06]
Presolve time: 0.04s
Presolved: 514 rows, 65656 columns, 131029 nonzeros

Concurrent LP optimizer: primal simplex, dual simplex, and barrier
Showing barrier log only...

Ordering time: 0.00s

Barrier statistics:
 AA' NZ     : 6.537e+04
 Factor NZ  : 1.198e+05 (roughly 30 MB of memory)
 Factor Ops : 3.620e+07 (less than 1 second per iteration)
 Threads    : 6

Barrier performed 0 iterations in 0.12 seconds (0.06 work units)
Barrier solve interrupted - model solved by another algorithm


Solved with primal simplex
Iteration 

In [52]:
if model.status == GRB.OPTIMAL:
    print("\n" + "="*60)
    print("‚úÖ OPTIMAL SOLUTION FOUND")
    print("="*60)
    
    # Extract flows
    flows = []
    for i in excess_ids:
        for j in deficit_ids:
            val = x[i, j].X
            if val > 1e-6:
                flows.append({
                    "from_pantry": i,
                    "to_pantry": j,
                    "lbs": val,
                    "distance_miles": dist_df_pantry.loc[i, j]
                })
    
    flows_df = pd.DataFrame(flows)
    
    # Extract unmet demand
    unmet_demand = {j: u[j].X for j in deficit_ids}
    total_unmet = sum(unmet_demand.values())
    
    # Calculate statistics
    total_transported = flows_df['lbs'].sum() if len(flows_df) > 0 else 0
    total_distance = (flows_df['lbs'] * flows_df['distance_miles']).sum() if len(flows_df) > 0 else 0
    avg_distance = flows_df['distance_miles'].mean() if len(flows_df) > 0 else 0
    
    print(f"\nüìä SOLUTION STATISTICS:")
    print(f"  Total flows created: {len(flows)}")
    print(f"  Total food transported: {total_transported:,.2f} lbs")
    print(f"  Total unmet demand: {total_unmet:,.2f} lbs ({total_unmet/sum(demand.values())*100:.1f}% of total demand)")
    print(f"  Total transport cost: {total_distance:,.2f} lb-miles")
    print(f"  Average transport distance: {avg_distance:.2f} miles")
    print(f"  Objective value: {model.objVal:,.2f}")
    
    # Save results
    if len(flows_df) > 0:
        flows_df.to_csv("data/model/optimal_flows.csv", index=False)
        print(f"\n Results saved to data/model/optimal_flows.csv")
        
        print("\n Sample flows:")
        print(flows_df.head(10).to_string(index=False))
    else:
        print("\n  No flows generated (all demand is unmet)")
    
    # Show pantries with highest unmet demand
    if total_unmet > 0:
        unmet_df = pd.DataFrame([
            {"pantry_id": j, "unmet_lbs": unmet_demand[j], "original_demand": demand[j]}
            for j in deficit_ids if unmet_demand[j] > 1
        ]).sort_values("unmet_lbs", ascending=False)
        
        print(f"\n Top 10 pantries with unmet demand:")
        print(unmet_df.head(10).to_string(index=False))

elif model.status == GRB.INFEASIBLE:
    print("\n MODEL IS INFEASIBLE")
    print("Computing IIS (Irreducible Inconsistent Subsystem)...")
    model.computeIIS()
    model.write("model_iis.ilp")
    print("IIS written to model_iis.ilp")
    
elif model.status == GRB.UNBOUNDED:
    print("\n MODEL IS UNBOUNDED")
    
else:
    print(f"\n Optimization failed with status code: {model.status}")
    print("See Gurobi documentation for status code meanings")


‚úÖ OPTIMAL SOLUTION FOUND

üìä SOLUTION STATISTICS:
  Total flows created: 391
  Total food transported: 51,351,774.00 lbs
  Total unmet demand: 45,304,139.95 lbs (46.9% of total demand)
  Total transport cost: 128,619,222.43 lb-miles
  Average transport distance: 2.36 miles
  Objective value: 453,170,018,707.15

 Results saved to data/model/optimal_flows.csv

 Sample flows:
from_pantry to_pantry           lbs  distance_miles
          4       374 222302.051948        1.050541
          6       439  39004.997311        5.373673
          6       514 183297.054637        4.666845
         12        11 111298.392590        0.366142
         12       256 111003.659358        0.483762
         16       410  18434.556744        1.913578
         16       426  16341.615221        9.879046
         16       498  13784.024404        2.315599
         16       502 173741.855579        4.974975
         19        18   2967.846350        0.401329

 Top 10 pantries with unmet demand:
pantry_id 

In [27]:
print("\n" + "="*60)
print("SENSITIVITY ANALYSIS: Finding Optimal Penalty")
print("="*60)

lambda_values = [1, 5, 10, 20, 50, 100]
results = []

for lam in lambda_values:
    print(f"\nTesting Œª = {lam}...")
    
    # Build model
    model = gp.Model("test")
    model.setParam("OutputFlag", 0)  # Quiet mode
    
    # Variables
    x = {(i, j): model.addVar(lb=0.0) for i in excess_ids for j in deficit_ids}
    u = {j: model.addVar(lb=0.0) for j in deficit_ids}
    model.update()
    
    # Objective
    transport = gp.quicksum(dist_df_pantry.loc[i, j] * x[i, j] 
                            for i in excess_ids for j in deficit_ids)
    unmet = lam * gp.quicksum(u[j] for j in deficit_ids)
    model.setObjective(transport + unmet, GRB.MINIMIZE)
    
    # Constraints
    for j in deficit_ids:
        model.addConstr(gp.quicksum(x[i, j] for i in excess_ids) + u[j] == demand[j])
    for i in excess_ids:
        model.addConstr(gp.quicksum(x[i, j] for j in deficit_ids) <= supply[i])
    
    # Solve
    model.optimize()
    
    if model.status == GRB.OPTIMAL:
        transported = sum(x[i,j].X for i in excess_ids for j in deficit_ids)
        total_unmet = sum(u[j].X for j in deficit_ids)
        transport_cost = sum(dist_df_pantry.loc[i,j] * x[i,j].X 
                            for i in excess_ids for j in deficit_ids)
        avg_dist = transport_cost / transported if transported > 0 else 0
        
        results.append({
            'lambda': lam,
            'transported_lbs': transported,
            'unmet_lbs': total_unmet,
            'pct_demand_met': transported / sum(demand.values()) * 100,
            'avg_distance': avg_dist,
            'total_lb_miles': transport_cost
        })

# Display results
results_df = pd.DataFrame(results)
print("\n" + "="*60)
print("SENSITIVITY ANALYSIS RESULTS")
print("="*60)
print(results_df.to_string(index=False))

# Save
results_df.to_csv("data/model/sensitivity_analysis.csv", index=False)
print("\n Sensitivity analysis saved")

# Find the "elbow" - best balance
results_df['efficiency'] = results_df['transported_lbs'] / results_df['total_lb_miles']
best_lambda = results_df.loc[results_df['efficiency'].idxmax(), 'lambda']
print(f"\n Recommended Œª value: {best_lambda} (best efficiency)")


SENSITIVITY ANALYSIS: Finding Optimal Penalty

Testing Œª = 1...

Testing Œª = 5...

Testing Œª = 10...

Testing Œª = 20...

Testing Œª = 50...

Testing Œª = 100...

SENSITIVITY ANALYSIS RESULTS
 lambda  transported_lbs    unmet_lbs  pct_demand_met  avg_distance  total_lb_miles
      1     4.157466e+06 9.249845e+07        4.301305      0.549030    2.282574e+06
      5     6.986019e+06 8.966990e+07        7.227720      0.981873    6.859383e+06
     10     6.986019e+06 8.966990e+07        7.227720      0.981873    6.859383e+06
     20     6.986019e+06 8.966990e+07        7.227720      0.981873    6.859383e+06
     50     6.986019e+06 8.966990e+07        7.227720      0.981873    6.859383e+06
    100     6.986019e+06 8.966990e+07        7.227720      0.981873    6.859383e+06

 Sensitivity analysis saved

 Recommended Œª value: 1 (best efficiency)


In [28]:
# ============================================================
# SENSITIVITY ANALYSIS: WEIGHT_UNMET
# ============================================================

print("\n" + "="*60)
print("SENSITIVITY ANALYSIS: Finding Optimal Weight_Unmet")
print("="*60)

# Fix lambda, vary weight
FIXED_LAMBDA = 1
weight_values = [1, 10, 25, 50, 100, 200, 500]
weight_results = []

for weight in weight_values:
    print(f"\nTesting WEIGHT_UNMET = {weight}...")
    
    # Build model
    model_test = gp.Model("test_weight")
    model_test.setParam("OutputFlag", 0)  # Quiet mode
    
    # Variables
    x_test = {(i, j): model_test.addVar(lb=0.0) for i in excess_ids for j in deficit_ids}
    u_test = {j: model_test.addVar(lb=0.0) for j in deficit_ids}
    model_test.update()
    
    # Objective
    transport = gp.quicksum(dist_df_pantry.loc[i, j] * x_test[i, j] 
                            for i in excess_ids for j in deficit_ids)
    unmet = FIXED_LAMBDA * gp.quicksum(weight * u_test[j] for j in deficit_ids)
    model_test.setObjective(transport + unmet, GRB.MINIMIZE)
    
    # Constraints
    for j in deficit_ids:
        model_test.addConstr(gp.quicksum(x_test[i, j] for i in excess_ids) + u_test[j] == demand[j])
    for i in excess_ids:
        model_test.addConstr(gp.quicksum(x_test[i, j] for j in deficit_ids) <= supply[i])
    
    # Solve
    model_test.optimize()
    
    if model_test.status == GRB.OPTIMAL:
        transported = sum(x_test[i,j].X for i in excess_ids for j in deficit_ids)
        total_unmet = sum(u_test[j].X for j in deficit_ids)
        transport_cost = sum(dist_df_pantry.loc[i,j] * x_test[i,j].X 
                            for i in excess_ids for j in deficit_ids)
        avg_dist = transport_cost / transported if transported > 0 else 0
        
        weight_results.append({
            'weight_unmet': weight,
            'transported_lbs': transported,
            'unmet_lbs': total_unmet,
            'pct_supply_used': transported / sum(supply.values()) * 100,
            'pct_demand_met': transported / sum(demand.values()) * 100,
            'avg_distance': avg_dist,
            'total_lb_miles': transport_cost
        })

# Display results
weight_results_df = pd.DataFrame(weight_results)
print("\n" + "="*60)
print("WEIGHT_UNMET SENSITIVITY ANALYSIS RESULTS (Œª = 1)")
print("="*60)
print(weight_results_df.to_string(index=False))

# Save
weight_results_df.to_csv("data/model/sensitivity_weight_unmet.csv", index=False)
print("\n‚úÖ Weight sensitivity analysis saved")

# Find the "elbow" - best balance
weight_results_df['efficiency'] = weight_results_df['transported_lbs'] / weight_results_df['total_lb_miles']
best_weight = weight_results_df.loc[weight_results_df['efficiency'].idxmax(), 'weight_unmet']
print(f"\nüéØ Recommended WEIGHT_UNMET value: {best_weight} (best efficiency)")


SENSITIVITY ANALYSIS: Finding Optimal Weight_Unmet

Testing WEIGHT_UNMET = 1...

Testing WEIGHT_UNMET = 10...

Testing WEIGHT_UNMET = 25...

Testing WEIGHT_UNMET = 50...

Testing WEIGHT_UNMET = 100...

Testing WEIGHT_UNMET = 200...

Testing WEIGHT_UNMET = 500...

WEIGHT_UNMET SENSITIVITY ANALYSIS RESULTS (Œª = 1)
 weight_unmet  transported_lbs    unmet_lbs  pct_supply_used  pct_demand_met  avg_distance  total_lb_miles
            1     4.157466e+06 9.249845e+07        59.511229        4.301305      0.549030    2.282574e+06
           10     6.986019e+06 8.966990e+07       100.000000        7.227720      0.981873    6.859383e+06
           25     6.986019e+06 8.966990e+07       100.000000        7.227720      0.981873    6.859383e+06
           50     6.986019e+06 8.966990e+07       100.000000        7.227720      0.981873    6.859383e+06
          100     6.986019e+06 8.966990e+07       100.000000        7.227720      0.981873    6.859383e+06
          200     6.986019e+06 8.966990e+0

In [None]:
print("\n" + "="*60)
print("SOLUTION BREAKDOWN BY BOROUGH")
print("="*60)

# Add borough info to pantries
borough_map = {
    'BX': 'Bronx', 'BK': 'Brooklyn', 'MN': 'Manhattan',
    'QN': 'Queens', 'SI': 'Staten Island'
}

excess_pantries['borough'] = excess_pantries['nta2020_clean'].str[:2].map(borough_map)
deficit_pantries['borough'] = deficit_pantries['nta2020_clean'].str[:2].map(borough_map)

if len(flows_df) > 0:
    # Merge borough info
    flows_df = flows_df.merge(
        excess_pantries[['id', 'borough']].rename(columns={'borough': 'from_borough'}),
        left_on='from_pantry', right_on='id', how='left'
    ).merge(
        deficit_pantries[['id', 'borough']].rename(columns={'borough': 'to_borough'}),
        left_on='to_pantry', right_on='id', how='left'
    )
    
    # Summary by borough
    borough_summary = flows_df.groupby(['from_borough', 'to_borough']).agg({
        'lbs': 'sum',
        'distance_miles': 'mean',
        'from_pantry': 'count'
    }).rename(columns={'from_pantry': 'num_flows'}).reset_index()
    
    borough_summary['lbs'] = borough_summary['lbs'].round(0)
    borough_summary['distance_miles'] = borough_summary['distance_miles'].round(2)
    
    print("\nüó∫Ô∏è  Food flows by borough pair:")
    print(borough_summary.sort_values('lbs', ascending=False).to_string(index=False))
    
    # Within vs between borough flows
    flows_df['flow_type'] = flows_df.apply(
        lambda x: 'Within borough' if x['from_borough'] == x['to_borough'] else 'Between boroughs',
        axis=1
    )
    
    flow_type_summary = flows_df.groupby('flow_type').agg({
        'lbs': 'sum',
        'distance_miles': 'mean'
    }).round(2)
    
    print("\nüèôÔ∏è  Within vs Between Borough:")
    print(flow_type_summary)
    
    # Supply and demand by borough
    supply_by_boro = excess_pantries.groupby('borough').agg({
        'id': 'count',
        'nta2020_clean': lambda x: x.map(lambda n: supply.get(
            excess_pantries[excess_pantries['nta2020_clean']==n]['id'].iloc[0] if len(excess_pantries[excess_pantries['nta2020_clean']==n]) > 0 else '0', 0
        )).sum()
    }).rename(columns={'id': 'num_pantries', 'nta2020_clean': 'total_supply'})
    
    print("\nüì¶ Supply by borough:")
    print(supply_by_boro)


SOLUTION BREAKDOWN BY BOROUGH

üó∫Ô∏è  Food flows by borough pair:
 from_borough    to_borough       lbs  distance_miles  num_flows
     Brooklyn      Brooklyn 4083459.0            0.47         62
        Bronx         Bronx 2940846.0            0.53         33
       Queens        Queens 2810463.0            0.62         24
    Manhattan     Manhattan  940694.0            0.38         14
Staten Island Staten Island  390237.0            0.60          4
    Manhattan         Bronx  204011.0            0.83          3
     Brooklyn        Queens   16148.0            0.68          2

üèôÔ∏è  Within vs Between Borough:
                          lbs  distance_miles
flow_type                                    
Between boroughs    220158.76            0.77
Within borough    11165698.82            0.51

üì¶ Supply by borough:
               num_pantries  total_supply
borough                                  
Bronx                    34  5.238664e+06
Brooklyn                 98  2.155641e+

In [12]:
# ============================================================
# DISTANCE DIAGNOSTICS
# ============================================================

print("\n" + "="*60)
print("DISTANCE MATRIX DIAGNOSTICS")
print("="*60)

# Check distance distribution
all_distances = dist_df_pantry.values.flatten()
print(f"\nüìè Distance Statistics:")
print(f"  Min distance:     {all_distances.min():.2f} miles")
print(f"  25th percentile:  {np.percentile(all_distances, 25):.2f} miles")
print(f"  Median distance:  {np.percentile(all_distances, 50):.2f} miles")
print(f"  75th percentile:  {np.percentile(all_distances, 75):.2f} miles")
print(f"  Max distance:     {all_distances.max():.2f} miles")
print(f"  Mean distance:    {all_distances.mean():.2f} miles")

# Check how many flows are possible at different distance thresholds
thresholds = [5, 10, 15, 20, 25, 30]
print(f"\nüéØ Flows Available at Different Distance Caps:")
for threshold in thresholds:
    num_flows = (dist_df_pantry <= threshold).sum().sum()
    pct = num_flows / (len(excess_ids) * len(deficit_ids)) * 100
    print(f"  Within {threshold:2d} miles: {num_flows:6,} flows ({pct:5.1f}% of all pairs)")

# Check connectivity
print(f"\nüîó CONNECTIVITY ANALYSIS:")
for i in excess_ids[:5]:  # Check first 5 excess pantries
    nta = excess_pantries[excess_pantries['id']==i]['nta2020_clean'].iloc[0]
    supply_amt = supply[i]
    
    # Count how many deficit pantries are reachable
    within_5mi = (dist_df_pantry.loc[i] <= 5).sum()
    within_10mi = (dist_df_pantry.loc[i] <= 10).sum()
    within_15mi = (dist_df_pantry.loc[i] <= 15).sum()
    
    print(f"\n  Pantry {i} ({nta}): {supply_amt:,.0f} lbs available")
    print(f"    Can reach within  5 miles: {within_5mi:3d} pantries")
    print(f"    Can reach within 10 miles: {within_10mi:3d} pantries")
    print(f"    Can reach within 15 miles: {within_15mi:3d} pantries")


DISTANCE MATRIX DIAGNOSTICS

üìè Distance Statistics:
  Min distance:     0.04 miles
  25th percentile:  5.36 miles
  Median distance:  9.16 miles
  75th percentile:  12.23 miles
  Max distance:     33.10 miles
  Mean distance:    9.04 miles

üéØ Flows Available at Different Distance Caps:
  Within  5 miles: 14,785 flows ( 22.6% of all pairs)
  Within 10 miles: 36,903 flows ( 56.4% of all pairs)
  Within 15 miles: 59,536 flows ( 91.1% of all pairs)
  Within 20 miles: 64,481 flows ( 98.6% of all pairs)
  Within 25 miles: 65,174 flows ( 99.7% of all pairs)
  Within 30 miles: 65,363 flows (100.0% of all pairs)

üîó CONNECTIVITY ANALYSIS:

  Pantry 4 (QN0203): 181,568 lbs available
    Can reach within  5 miles:  93 pantries
    Can reach within 10 miles: 238 pantries
    Can reach within 15 miles: 277 pantries

  Pantry 6 (QN1201): 325,768 lbs available
    Can reach within  5 miles:  24 pantries
    Can reach within 10 miles: 141 pantries
    Can reach within 15 miles: 271 pantries

