In [73]:
import pandas as pd
import numpy as np
import geopandas as gpd
from haversine import haversine, Unit
import gurobipy as gp
from gurobipy import GRB

# Load and prepare data
nta_table = pd.read_csv("data/model/nta_table.csv")
pantries_table = pd.read_csv("data/model/pantries_table.csv")

# Clean supply gap column
SUPPLY_GAP_COL = "Supply Gap (lbs.)"
nta_table[SUPPLY_GAP_COL] = nta_table[SUPPLY_GAP_COL].apply(
    lambda x: float(str(x).replace(',', ''))
)

print(f"Loaded {len(nta_table)} neighborhoods and {len(pantries_table)} pantries")

# Separate excess and deficit NTAs
EXCESS_NTA = nta_table[nta_table[SUPPLY_GAP_COL] < 0].copy()
DEFICIT_NTA = nta_table[nta_table[SUPPLY_GAP_COL] > 0].copy()

excess_nta_ids = set(EXCESS_NTA['nta2020'].astype(str).str.strip().str.upper())
deficit_nta_ids = set(DEFICIT_NTA['nta2020'].astype(str).str.strip().str.upper())

print(f"\nExcess NTAs: {len(excess_nta_ids)}")
print(f"Deficit NTAs: {len(deficit_nta_ids)}")

deficit_pantries = pantries_table[pantries_table['nta2020'].isin(deficit_nta_ids)]
excess_pantries = pantries_table[pantries_table['nta2020'].isin(excess_nta_ids)]


print(f"\nDeficit pantries: {len(deficit_pantries)}")
print(f"Excess pantries: {len(excess_pantries)}")

Loaded 197 neighborhoods and 515 pantries

Excess NTAs: 55
Deficit NTAs: 142

Deficit pantries: 283
Excess pantries: 231


In [76]:


def parse_point(geom_str):
    """Extract (lat, lon) from POINT string"""
    if pd.isna(geom_str):
        return None
    try:
        # Handle "POINT (lon lat)" format
        coords = str(geom_str).replace('POINT (', '').replace(')', '').split()
        return (float(coords[1]), float(coords[0]))  # (lat, lon)
    except:
        return None

excess_pantries['coords'] = excess_pantries['geometry'].apply(parse_point)
deficit_pantries['coords'] = deficit_pantries['geometry'].apply(parse_point)

# Remove any pantries with missing coordinates
excess_pantries = excess_pantries.dropna(subset=['coords'])
deficit_pantries = deficit_pantries.dropna(subset=['coords'])

print(f"Excess pantries with valid coords: {len(excess_pantries)}")
print(f"Deficit pantries with valid coords: {len(deficit_pantries)}")

excess_pantries['id'] = excess_pantries['id'].astype(str)
deficit_pantries['id'] = deficit_pantries['id'].astype(str)
# Get lists of IDs in order
excess_ids = excess_pantries['id'].tolist()
deficit_ids = deficit_pantries['id'].tolist()

# # Deficit pantry IDs as strings (to match Gurobi var keys)
# deficit_ids = [str(pid) for pid in deficit_pantries["id"]]
# excess_ids  = [str(pid) for pid in excess_pantries["id"]]

# Compute distance matrix
print("\nComputing distance matrix...")
dist_matrix = np.zeros((len(excess_ids), len(deficit_ids)))

for i, excess_id in enumerate(excess_ids):
    if i % 50 == 0:
        print(f"  Processing excess pantry {i+1}/{len(excess_ids)}")
    
    excess_coord = excess_pantries[excess_pantries['id'] == excess_id]['coords'].iloc[0]
    for j, deficit_id in enumerate(deficit_ids):
        deficit_coord = deficit_pantries[deficit_pantries['id'] == deficit_id]['coords'].iloc[0]
        dist_matrix[i, j] = haversine(excess_coord, deficit_coord, unit=Unit.MILES)

# Create DataFrame with pantry IDs as index/columns
dist_df_pantry = pd.DataFrame(
    dist_matrix,
    index=excess_ids,
    columns=deficit_ids
)

print(f"\nDistance matrix shape: {dist_df_pantry.shape}")
print(f"Distance range: {dist_matrix.min():.2f} to {dist_matrix.max():.2f} miles")

Excess pantries with valid coords: 231
Deficit pantries with valid coords: 283

Computing distance matrix...
  Processing excess pantry 1/231
  Processing excess pantry 51/231
  Processing excess pantry 101/231
  Processing excess pantry 151/231
  Processing excess pantry 201/231

Distance matrix shape: (231, 283)
Distance range: 0.04 to 33.10 miles


In [77]:
# Verify alignment and clean data
# Update supply/demand dicts to only include pantries in distance matrix
supply_per_pantry = {pid: supply_per_pantry[pid] for pid in excess_ids if pid in supply_per_pantry}
demand_per_pantry = {pid: demand_per_pantry[pid] for pid in deficit_ids if pid in demand_per_pantry}

# Remove pantries with zero supply or demand (optional - helps reduce problem size)
supply_per_pantry = {
    k: v for k, v in supply_per_pantry.items() 
    if v["supply"] > 0
}

demand_per_pantry = {
    k: v for k, v in demand_per_pantry.items() 
    if v["demand"] > 0
}

In [78]:
# Filter pantries by NTA membership
# Ensure pantries table has clean NTA codes
pantries_table['nta2020_clean'] = pantries_table['nta2020'].astype(str).str.strip().str.upper()

tracts = pd.read_csv("data/model/tracts_table.csv")
pantries = pd.read_csv("data/model/pantries_table_new.csv")
# Ensure geoid matches type
pantries_table["geoid"] = pantries_table["geoid"].astype(str)
tracts["geoid"] = tracts["geoid"].astype(str)

########################### FILTER PANTRIES: ID #, GEO ID, NTA, TOTAL POP ###################
excess_pantries = (
    pantries_table
    # 1. filter to excess NTAs
    .loc[pantries_table['nta2020_clean'].isin(excess_nta_ids),
         ['id', 'geoid', 'nta2020_clean']]
    # 2. add tract population from tracts table
    .merge(
        tracts[['geoid', 'TotalPop']],
        on='geoid',
        how='left'
    )
)
print(excess_pantries)
deficit_pantries = (
    pantries_table
    # 1. filter to excess NTAs
    .loc[pantries_table['nta2020_clean'].isin(deficit_nta_ids),
         ['id', 'geoid', 'nta2020_clean']]
    # 2. add tract population from tracts table
    .merge(
        tracts[['geoid', 'TotalPop']],
        on='geoid',
        how='left'
    )
)
print(deficit_pantries)
# Ensure 'id' column exists and is string
excess_pantries['id'] = excess_pantries['id'].astype(str)
deficit_pantries['id'] = deficit_pantries['id'].astype(str)

print(f"Excess pantries: {len(excess_pantries)}")
print(f"Deficit pantries: {len(deficit_pantries)}")

# Build supply and demand dictionaries using pantry IDs
# Create NTA lookup dictionary for faster access
nta_lookup = {}
for _, row in nta_table.iterrows():
    nta_code = str(row['nta2020']).strip().upper()
    nta_lookup[nta_code] = {
        'gap': float(row[SUPPLY_GAP_COL]),
        'pantry_count': int(row['pantry_count']) if pd.notna(row['pantry_count']) else 1
    }

# Build supply dictionary (keyed by pantry ID)
supply_per_pantry = {}

for _, row in pantries_table.iterrows():
    pantry_id = str(row['id'])
    nta_code = row['nta2020_clean']

    if nta_code in nta_lookup:
        gap = nta_lookup[nta_code]['gap']
        n_pantries = max(nta_lookup[nta_code]['pantry_count'], 1)

        supply_amount = abs(gap) / n_pantries if gap > 0 else 0.0
    else:
        supply_amount = 0.0

    # store BOTH NTA and supply
    supply_per_pantry[pantry_id] = {
        "nta": nta_code,
        "supply": supply_amount
    }

print(supply_per_pantry)


# Build demand dictionary (keyed by pantry ID)
demand_per_pantry = {}

for _, row in pantries_table.iterrows():
    pantry_id = str(row['id'])
    nta_code = row['nta2020_clean']

    if nta_code in nta_lookup:
        gap = nta_lookup[nta_code]['gap']        # This is positive for deficit NTAs
        n_pantries = max(nta_lookup[nta_code]['pantry_count'], 1)

        demand_amount = gap / n_pantries if gap > 0 else 0.0
    else:
        demand_amount = 0.0

    # Store BOTH the NTA and the amount
    demand_per_pantry[pantry_id] = {
        "nta": nta_code,
        "demand": demand_amount
    }

print(demand_per_pantry)


print(f"\nSupply pantries with data: {len(supply_per_pantry)}")
print(f"Demand pantries with data: {len(demand_per_pantry)}")
total_supply = sum(d["supply"] for d in supply_per_pantry.values())
total_demand = sum(d["demand"] for d in demand_per_pantry.values())

print(f"Total supply (lbs): {total_supply:,.2f}")
print(f"Total demand (lbs): {total_demand:,.2f}")



#################### VERIFICATION ##########################
# Sum the supply across pantries for each NTA
# supply_sum_by_nta = {}

# for pantry_id, info in supply_per_pantry.items():
#     nta = info["nta"]
#     supply = info["supply"]
#     supply_sum_by_nta[nta] = supply_sum_by_nta.get(nta, 0) + supply

# # Compare with NTA gap
# for nta_code, total_supply in supply_sum_by_nta.items():
#     original_gap = nta_lookup[nta_code]["gap"]  # negative = surplus
#     print(f"NTA {nta_code}: summed supply = {total_supply:,.2f}, original gap = {original_gap:,.2f}")

# Sum the demand across pantries for each NTA
# demand_sum_by_nta = {}

# for pantry_id, info in demand_per_pantry.items():
#     nta = info["nta"]
#     dmd = info["demand"]
#     demand_sum_by_nta[nta] = demand_sum_by_nta.get(nta, 0) + dmd

# # Compare with NTA gap
# for nta_code, total_demand in demand_sum_by_nta.items():
#     original_gap = nta_lookup[nta_code]["gap"]  # positive = deficit
#     print(f"NTA {nta_code}: summed demand = {total_demand:,.2f}, original gap = {original_gap:,.2f}")




      id        geoid nta2020_clean  TotalPop
0      4  36081026100        QN0203    7098.0
1      6  36081044601        QN1201    3119.0
2     12  36061011401        MN0802    1276.0
3     16  36081038400        QN1205    2420.0
4     19  36081036300        QN0302    2105.0
..   ...          ...           ...       ...
226  495  36081044400        QN1201    3024.0
227  496  36047116000        BK0502    2139.0
228  506  36047036300        BK1601    4076.0
229  507  36061013000        MN0802    3277.0
230  512  36081051200        QN1303    2925.0

[231 rows x 4 columns]
      id        geoid nta2020_clean  TotalPop
0      0  36005025500        BX0701    5874.0
1      1  36005005002        BX0901    5650.0
2      2  36047032500        BK0901    6446.0
3      3  36047058000        BK1502    3999.0
4      5  36081103202        QN1401    7195.0
..   ...          ...           ...       ...
278  509  36061024500        MN1201   16231.0
279  510  36085001100        SI0101    3032.0
280  511  

In [79]:
# Filter distance matrix
# dist_df_pantry = dist_df_pantry.loc[excess_ids, deficit_ids]
#print(dist_df_pantry)

print(f"Supply dict keys: {len(supply_per_pantry)}")
print(f"Demand dict keys: {len(demand_per_pantry)}")
print(f"Distance matrix rows (excess): {dist_df_pantry.shape[0]}")
print(f"Distance matrix columns (deficit): {dist_df_pantry.shape[1]}")
print(f"All supply keys in distance index: {set(supply_per_pantry.keys()) == set(dist_df_pantry.index)}")
print(f"All demand keys in distance columns: {set(demand_per_pantry.keys()) == set(dist_df_pantry.columns)}")
print(f"\nTotal supply available: {sum(d['supply'] for d in supply_per_pantry.values()):,.2f} lbs")
print(f"\nTotal demand needed: {sum(d['demand'] for d in demand_per_pantry.values()):,.2f} lbs")
total_supply = sum(v['supply'] for v in supply_per_pantry.values())
total_demand = sum(v['demand'] for v in demand_per_pantry.values())
print(f"Supply/Demand ratio: {total_supply/total_demand:.2%}")

# OPTIMIZATION MODEL
print("\n" + "="*60)
print("BUILDING OPTIMIZATION MODEL")
print("="*60)

#LAMBDA_UNMET = 50

######################## CREATE WEIGHT VALUE FOR UNMET DEMAND OBJECTIVE ########################################

# Make sure there are no missing pops; if there are, treat as 0
deficit_pantries["TotalPop"] = deficit_pantries["TotalPop"].fillna(0)


# w_unmet[j] = population of the tract around pantry j
w_unmet = {
    str(row["id"]): float(row["TotalPop"])
    for _, row in deficit_pantries.iterrows()
}

print(w_unmet)

# Build model
model = gp.Model("pantry_rebalance")
model.setParam("OutputFlag", 1)

# Decision variables
x = {(i, j): model.addVar(lb=0.0, name=f"x_{i}_{j}") 
     for i in excess_ids for j in deficit_ids}

u = model.addVars(deficit_ids, name="u", lb=0.0)

model.update()

print(f"\nVariables created:")
print(f"  Flow variables (x): {len(x):,}")
print(f"  Unmet variables (u): {len(u):,}")

# Objective function
transport_cost = gp.quicksum(
    dist_df_pantry.loc[i, j] * x[i, j]
    for i in excess_ids for j in deficit_ids
)

unmet_cost = gp.quicksum(
    w_unmet[j] * u[j] for j in deficit_ids
)

model.setObjective(transport_cost + unmet_cost, GRB.MINIMIZE)

print("\nObjective function set: minimize (transport_cost + penalty * unmet_demand)")

# Constraints
print("\nAdding constraints...")

# 1. Demand satisfaction
for j in deficit_ids:
    model.addConstr(
        gp.quicksum(x[i, j] for i in excess_ids) + u[j] == demand_per_pantry[j]["demand"],
        name=f"demand_{j}"
    )

print(f"  Added {len(deficit_ids)} demand constraints")

# 2. Supply limits
for i in excess_ids:
    model.addConstr(
        gp.quicksum(x[i, j] for j in deficit_ids) <= supply_per_pantry[j]["supply"],
        name=f"supply_{i}"
    )

print(f"  Added {len(excess_ids)} supply constraints")

# Solve
print("\n" + "="*60)
print("SOLVING OPTIMIZATION MODEL")
print("="*60 + "\n")

model.optimize()


Supply dict keys: 515
Demand dict keys: 515
Distance matrix rows (excess): 231
Distance matrix columns (deficit): 283
All supply keys in distance index: False
All demand keys in distance columns: False

Total supply available: 96,655,913.95 lbs

Total demand needed: 96,655,913.95 lbs
Supply/Demand ratio: 100.00%

BUILDING OPTIMIZATION MODEL
{'0': 5874.0, '1': 5650.0, '2': 6446.0, '3': 3999.0, '5': 7195.0, '7': 5069.0, '8': 2513.0, '9': 7962.0, '10': 6890.0, '11': 1683.0, '13': 5319.0, '14': 939.0, '15': 10840.0, '17': 0.0, '18': 3717.0, '20': 2988.0, '21': 4218.0, '22': 7195.0, '24': 0.0, '25': 1486.0, '26': 3797.0, '27': 7279.0, '28': 3782.0, '33': 3125.0, '34': 4131.0, '35': 4048.0, '39': 4702.0, '40': 2577.0, '45': 3411.0, '49': 2436.0, '52': 4790.0, '53': 2685.0, '54': 4987.0, '57': 3797.0, '58': 3058.0, '60': 3302.0, '62': 2560.0, '67': 3973.0, '73': 2111.0, '74': 3278.0, '75': 6947.0, '77': 3860.0, '78': 3506.0, '80': 6156.0, '81': 6416.0, '84': 5136.0, '90': 2906.0, '95': 3819.0

In [80]:
if model.status == GRB.OPTIMAL:
    print("\n" + "="*60)
    print("‚úÖ OPTIMAL SOLUTION FOUND")
    print("="*60)
    
    # Extract flows
    flows = []
    for i in excess_ids:
        for j in deficit_ids:
            val = x[i, j].X
            if val > 1e-6:
                flows.append({
                    "from_pantry": i,
                    "to_pantry": j,
                    "lbs": val,
                    "distance_miles": dist_df_pantry.loc[i, j]
                })
    
    flows_df = pd.DataFrame(flows)
    
    # Extract unmet demand
    unmet_demand = {j: u[j].X for j in deficit_ids}
    total_unmet = sum(unmet_demand.values())
    
    # Calculate statistics
    total_transported = flows_df['lbs'].sum() if len(flows_df) > 0 else 0
    total_distance = (flows_df['lbs'] * flows_df['distance_miles']).sum() if len(flows_df) > 0 else 0
    avg_distance = flows_df['distance_miles'].mean() if len(flows_df) > 0 else 0
    
    print(f"\nüìä SOLUTION STATISTICS:")
    print(f"  Total flows created: {len(flows)}")
    print(f"  Total food transported: {total_transported:,.2f} lbs")
    print(f"  Total unmet demand: {total_unmet:,.2f} lbs ({total_unmet/sum(demand.values())*100:.1f}% of total demand)")
    print(f"  Total transport cost: {total_distance:,.2f} lb-miles")
    print(f"  Average transport distance: {avg_distance:.2f} miles")
    print(f"  Objective value: {model.objVal:,.2f}")
    
    # Save results
    if len(flows_df) > 0:
        flows_df.to_csv("data/model/optimal_flows.csv", index=False)
        print(f"\n Results saved to data/model/optimal_flows.csv")
        
        print("\n Sample flows:")
        print(flows_df.head(10).to_string(index=False))
    else:
        print("\n  No flows generated (all demand is unmet)")
    
    # Show pantries with highest unmet demand
    if total_unmet > 0:
        unmet_df = pd.DataFrame([
            {"pantry_id": j, "unmet_lbs": unmet_demand[j], "original_demand": demand[j]}
            for j in deficit_ids if unmet_demand[j] > 1
        ]).sort_values("unmet_lbs", ascending=False)
        
        print(f"\n Top 10 pantries with unmet demand:")
        print(unmet_df.head(10).to_string(index=False))

elif model.status == GRB.INFEASIBLE:
    print("\n MODEL IS INFEASIBLE")
    print("Computing IIS (Irreducible Inconsistent Subsystem)...")
    model.computeIIS()
    model.write("model_iis.ilp")
    print("IIS written to model_iis.ilp")
    
elif model.status == GRB.UNBOUNDED:
    print("\n MODEL IS UNBOUNDED")
    
else:
    print(f"\n Optimization failed with status code: {model.status}")
    print("See Gurobi documentation for status code meanings")


‚úÖ OPTIMAL SOLUTION FOUND

üìä SOLUTION STATISTICS:
  Total flows created: 493
  Total food transported: 92,784,712.47 lbs
  Total unmet demand: 3,871,201.48 lbs (4.0% of total demand)
  Total transport cost: 400,139,014.74 lb-miles
  Average transport distance: 4.20 miles
  Objective value: 400,139,014.74

 Results saved to data/model/optimal_flows.csv

 Sample flows:
from_pantry to_pantry           lbs  distance_miles
          4       283 308655.761656        4.307161
          4       374  96943.344929        1.050541
          6       275  62362.836272       10.127437
          6       341 343236.270313        3.507507
         12       282 405599.106585        1.282941
         16       168 110115.938131       13.888298
         16       183 115979.264405       10.237020
         16       221 103702.302039       12.671513
         16       502  75801.602010        4.974975
         19        10  10904.021874        6.027550

 Top 10 pantries with unmet demand:
pantry_id     un

In [None]:
print("\n" + "="*60)
print("SENSITIVITY ANALYSIS: Finding Optimal Penalty")
print("="*60)

lambda_values = [1, 5, 10, 20, 50, 100]
results = []

for lam in lambda_values:
    print(f"\nTesting Œª = {lam}...")
    
    # Build model
    model = gp.Model("test")
    model.setParam("OutputFlag", 0)  # Quiet mode
    
    # Variables
    x = {(i, j): model.addVar(lb=0.0) for i in excess_ids for j in deficit_ids}
    u = {j: model.addVar(lb=0.0) for j in deficit_ids}
    model.update()
    
    # Objective
    transport = gp.quicksum(dist_df_pantry.loc[i, j] * x[i, j] 
                            for i in excess_ids for j in deficit_ids)
    unmet = lam * gp.quicksum(u[j] for j in deficit_ids)
    model.setObjective(transport + unmet, GRB.MINIMIZE)
    
    # Constraints
    for j in deficit_ids:
        model.addConstr(gp.quicksum(x[i, j] for i in excess_ids) + u[j] == demand[j])
    for i in excess_ids:
        model.addConstr(gp.quicksum(x[i, j] for j in deficit_ids) <= supply[i])
    
    # Solve
    model.optimize()
    
    if model.status == GRB.OPTIMAL:
        transported = sum(x[i,j].X for i in excess_ids for j in deficit_ids)
        total_unmet = sum(u[j].X for j in deficit_ids)
        transport_cost = sum(dist_df_pantry.loc[i,j] * x[i,j].X 
                            for i in excess_ids for j in deficit_ids)
        avg_dist = transport_cost / transported if transported > 0 else 0
        
        results.append({
            'lambda': lam,
            'transported_lbs': transported,
            'unmet_lbs': total_unmet,
            'pct_demand_met': transported / sum(demand.values()) * 100,
            'avg_distance': avg_dist,
            'total_lb_miles': transport_cost
        })

# Display results
results_df = pd.DataFrame(results)
print("\n" + "="*60)
print("SENSITIVITY ANALYSIS RESULTS")
print("="*60)
print(results_df.to_string(index=False))

# Save
results_df.to_csv("data/model/sensitivity_analysis.csv", index=False)
print("\n Sensitivity analysis saved")

# Find the "elbow" - best balance
results_df['efficiency'] = results_df['transported_lbs'] / results_df['total_lb_miles']
best_lambda = results_df.loc[results_df['efficiency'].idxmax(), 'lambda']
print(f"\n Recommended Œª value: {best_lambda} (best efficiency)")


SENSITIVITY ANALYSIS: Finding Optimal Penalty

Testing Œª = 1...

Testing Œª = 5...

Testing Œª = 10...

Testing Œª = 20...

Testing Œª = 50...

Testing Œª = 100...

SENSITIVITY ANALYSIS RESULTS
 lambda  transported_lbs    unmet_lbs  pct_demand_met  avg_distance  total_lb_miles
      1     1.138586e+07 8.527006e+07       11.779784      0.605499    6.894122e+06
      5     4.284381e+07 5.381211e+07       44.326108      2.190614    9.385424e+07
     10     4.948184e+07 4.717407e+07       51.193806      2.794067    1.382556e+08
     20     5.058946e+07 4.606646e+07       52.339741      2.961480    1.498197e+08
     50     5.058946e+07 4.606646e+07       52.339741      2.961480    1.498197e+08
    100     5.058946e+07 4.606646e+07       52.339741      2.961480    1.498197e+08

 Sensitivity analysis saved

 Recommended Œª value: 1 (best efficiency)


In [None]:
print("\n" + "="*60)
print("SOLUTION BREAKDOWN BY BOROUGH")
print("="*60)

# Add borough info to pantries
borough_map = {
    'BX': 'Bronx', 'BK': 'Brooklyn', 'MN': 'Manhattan',
    'QN': 'Queens', 'SI': 'Staten Island'
}

excess_pantries['borough'] = excess_pantries['nta2020_clean'].str[:2].map(borough_map)
deficit_pantries['borough'] = deficit_pantries['nta2020_clean'].str[:2].map(borough_map)

if len(flows_df) > 0:
    # Merge borough info
    flows_df = flows_df.merge(
        excess_pantries[['id', 'borough']].rename(columns={'borough': 'from_borough'}),
        left_on='from_pantry', right_on='id', how='left'
    ).merge(
        deficit_pantries[['id', 'borough']].rename(columns={'borough': 'to_borough'}),
        left_on='to_pantry', right_on='id', how='left'
    )
    
    # Summary by borough
    borough_summary = flows_df.groupby(['from_borough', 'to_borough']).agg({
        'lbs': 'sum',
        'distance_miles': 'mean',
        'from_pantry': 'count'
    }).rename(columns={'from_pantry': 'num_flows'}).reset_index()
    
    borough_summary['lbs'] = borough_summary['lbs'].round(0)
    borough_summary['distance_miles'] = borough_summary['distance_miles'].round(2)
    
    print("\nüó∫Ô∏è  Food flows by borough pair:")
    print(borough_summary.sort_values('lbs', ascending=False).to_string(index=False))
    
    # Within vs between borough flows
    flows_df['flow_type'] = flows_df.apply(
        lambda x: 'Within borough' if x['from_borough'] == x['to_borough'] else 'Between boroughs',
        axis=1
    )
    
    flow_type_summary = flows_df.groupby('flow_type').agg({
        'lbs': 'sum',
        'distance_miles': 'mean'
    }).round(2)
    
    print("\nüèôÔ∏è  Within vs Between Borough:")
    print(flow_type_summary)
    
    # Supply and demand by borough
    supply_by_boro = excess_pantries.groupby('borough').agg({
        'id': 'count',
        'nta2020_clean': lambda x: x.map(lambda n: supply.get(
            excess_pantries[excess_pantries['nta2020_clean']==n]['id'].iloc[0] if len(excess_pantries[excess_pantries['nta2020_clean']==n]) > 0 else '0', 0
        )).sum()
    }).rename(columns={'id': 'num_pantries', 'nta2020_clean': 'total_supply'})
    
    print("\nüì¶ Supply by borough:")
    print(supply_by_boro)


SOLUTION BREAKDOWN BY BOROUGH

üó∫Ô∏è  Food flows by borough pair:
 from_borough    to_borough       lbs  distance_miles  num_flows
     Brooklyn      Brooklyn 4083459.0            0.47         62
        Bronx         Bronx 2940846.0            0.53         33
       Queens        Queens 2810463.0            0.62         24
    Manhattan     Manhattan  940694.0            0.38         14
Staten Island Staten Island  390237.0            0.60          4
    Manhattan         Bronx  204011.0            0.83          3
     Brooklyn        Queens   16148.0            0.68          2

üèôÔ∏è  Within vs Between Borough:
                          lbs  distance_miles
flow_type                                    
Between boroughs    220158.76            0.77
Within borough    11165698.82            0.51

üì¶ Supply by borough:
               num_pantries  total_supply
borough                                  
Bronx                    34  5.238664e+06
Brooklyn                 98  2.155641e+

In [12]:
# ============================================================
# DISTANCE DIAGNOSTICS
# ============================================================

print("\n" + "="*60)
print("DISTANCE MATRIX DIAGNOSTICS")
print("="*60)

# Check distance distribution
all_distances = dist_df_pantry.values.flatten()
print(f"\nüìè Distance Statistics:")
print(f"  Min distance:     {all_distances.min():.2f} miles")
print(f"  25th percentile:  {np.percentile(all_distances, 25):.2f} miles")
print(f"  Median distance:  {np.percentile(all_distances, 50):.2f} miles")
print(f"  75th percentile:  {np.percentile(all_distances, 75):.2f} miles")
print(f"  Max distance:     {all_distances.max():.2f} miles")
print(f"  Mean distance:    {all_distances.mean():.2f} miles")

# Check how many flows are possible at different distance thresholds
thresholds = [5, 10, 15, 20, 25, 30]
print(f"\nüéØ Flows Available at Different Distance Caps:")
for threshold in thresholds:
    num_flows = (dist_df_pantry <= threshold).sum().sum()
    pct = num_flows / (len(excess_ids) * len(deficit_ids)) * 100
    print(f"  Within {threshold:2d} miles: {num_flows:6,} flows ({pct:5.1f}% of all pairs)")

# Check connectivity
print(f"\nüîó CONNECTIVITY ANALYSIS:")
for i in excess_ids[:5]:  # Check first 5 excess pantries
    nta = excess_pantries[excess_pantries['id']==i]['nta2020_clean'].iloc[0]
    supply_amt = supply[i]
    
    # Count how many deficit pantries are reachable
    within_5mi = (dist_df_pantry.loc[i] <= 5).sum()
    within_10mi = (dist_df_pantry.loc[i] <= 10).sum()
    within_15mi = (dist_df_pantry.loc[i] <= 15).sum()
    
    print(f"\n  Pantry {i} ({nta}): {supply_amt:,.0f} lbs available")
    print(f"    Can reach within  5 miles: {within_5mi:3d} pantries")
    print(f"    Can reach within 10 miles: {within_10mi:3d} pantries")
    print(f"    Can reach within 15 miles: {within_15mi:3d} pantries")


DISTANCE MATRIX DIAGNOSTICS

üìè Distance Statistics:
  Min distance:     0.04 miles
  25th percentile:  5.36 miles
  Median distance:  9.16 miles
  75th percentile:  12.23 miles
  Max distance:     33.10 miles
  Mean distance:    9.04 miles

üéØ Flows Available at Different Distance Caps:
  Within  5 miles: 14,785 flows ( 22.6% of all pairs)
  Within 10 miles: 36,903 flows ( 56.4% of all pairs)
  Within 15 miles: 59,536 flows ( 91.1% of all pairs)
  Within 20 miles: 64,481 flows ( 98.6% of all pairs)
  Within 25 miles: 65,174 flows ( 99.7% of all pairs)
  Within 30 miles: 65,363 flows (100.0% of all pairs)

üîó CONNECTIVITY ANALYSIS:

  Pantry 4 (QN0203): 181,568 lbs available
    Can reach within  5 miles:  93 pantries
    Can reach within 10 miles: 238 pantries
    Can reach within 15 miles: 277 pantries

  Pantry 6 (QN1201): 325,768 lbs available
    Can reach within  5 miles:  24 pantries
    Can reach within 10 miles: 141 pantries
    Can reach within 15 miles: 271 pantries



In [31]:
print(tracts.columns.tolist())

['Unnamed: 0', ':id', ':version', ':created_at', ':updated_at', 'ctlabel', 'borocode', 'boroname', 'ct2020', 'boroct2020', 'cdeligibil', 'ntaname', 'nta2020', 'cdta2020', 'cdtaname', 'geoid', 'shape_leng', 'shape_area', 'geometry', 'County', 'Borough', 'TotalPop', 'Men', 'Women', 'Hispanic', 'White', 'Black', 'Native', 'Asian', 'Citizen', 'Income', 'IncomeErr', 'IncomePerCap', 'IncomePerCapErr', 'Poverty', 'ChildPoverty', 'Professional', 'Service', 'Office', 'Construction', 'Production', 'Drive', 'Carpool', 'Transit', 'Walk', 'OtherTransp', 'WorkAtHome', 'MeanCommute', 'Employed', 'PrivateWork', 'PublicWork', 'SelfEmployed', 'FamilyWork', 'Unemployment']
