In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np

In [5]:
CENSUS_TRACTS_GEOJSON = "raw/census_tracts.geojson"   # e.g., 2020 Census tracts GeoJSON/GeoPackage/SHAPE
METRICS    = "data/tabular/nta_with_metrics.csv"                    # e.g., "data/supply_gap_by_neighborhood.csv" or None
PANTRIES_GEOJSON = "data/geo/pantries.geojson"
# PANTRIES_PER_NTA = "data/pantries_per_neighborhood.csv"

### Information We Will Calculate to use in our optimization model:
1. Number of pantries within each NTA (neighborhood tabulation area) - coincides with metrics of food supply gap
2. Pairwise distance between all pantries and centroids of NTAs

a. Load data:
- Food pantries: long, lat

In [7]:
# A: Food pantries
pantries = pd.read_csv(PANTRIES_GEOJSON)

NUM_PANTRIES = pantries.shape[0]
print(f"Number of food pantries: {NUM_PANTRIES}")

pantries.head()

Number of food pantries: 0


Unnamed: 0,"{""type"":""FeatureCollection""","features:[{""type"":""Feature""",id:1,"geometry:{""type"":""Point""",coordinates:[-73.905672642358,40.862216353818]},"properties:{""FID"":1","type_fp:""FP""",type_sk:null,"program:""CHURCH OF ST. NICHOLAS OF TOLENTINE""",...,sk_wed_open3:null.409,sk_wed_close3:null.409,sk_thu_open3:null.409,sk_thu_close3:null.409,sk_fri_open3:null.409,sk_fri_close3:null.409,sk_sat_open3:null.409,sk_sat_close3:null.409,sk_sun_open3:null.409,sk_sun_close3:null}}]}


In [4]:
# B: Supply gap by neighborhood

metrics_by_nta = pd.read_csv(METRICS)
NUM_NEIGHBORHOODS = metrics_by_nta.shape[0]
print(f"Number of neighborhoods: {NUM_NEIGHBORHOODS}")

metrics_by_nta.head()

FileNotFoundError: [Errno 2] No such file or directory: 'data/nta_enriched.csv'

In [78]:
# C: Pantries per NTA:

pantries_per_nta = pd.DataFrame(metrics_by_nta.groupby('nta2020')['pantry_count'].sum())


print(pantries_per_nta.shape)
pantries_per_nta.head()

(262, 1)


Unnamed: 0_level_0,pantry_count
nta2020,Unnamed: 1_level_1
BK0101,1
BK0102,1
BK0103,1
BK0104,1
BK0201,0


In [79]:
# If PANTRIES_PATH is CSV, specify columns:
PANTRY_LAT_COL = "latitude"
PANTRY_LON_COL = "longitude"

CRS_LATLON    = "EPSG:4326"
CRS_PROJECTED = "EPSG:6539"  # NYC-appropriate projected CRS

In [83]:
from pathlib import Path

In [None]:
pantry_path = Path(PANTRIES_GEOJSON)
assert pantry_path.exists(), f"Pantry file not found: {pantry_path}"

if pantry_path.suffix.lower() == ".csv":
    pan_df = pd.read_csv(pantry_path)
    for col in [PANTRY_LAT_COL, PANTRY_LON_COL]:
        assert col in pan_df.columns, f"Missing column in pantry CSV: {col}"
    pan_df = pan_df.dropna(subset=[PANTRY_LAT_COL, PANTRY_LON_COL])
    geometry = gpd.points_from_xy(pan_df[PANTRY_LON_COL], pan_df[PANTRY_LAT_COL], crs=CRS_LATLON)
    pantries = gpd.GeoDataFrame(pan_df, geometry=geometry)
else:
    pantries = gpd.read_file(pantry_path)
    if pantries.crs is None:
        print("⚠️ Pantries have no CRS; assuming WGS84")
        pantries = pantries.set_crs(CRS_LATLON)

pantries_ll = pantries.to_crs(CRS_LATLON)
pantries_proj = pantries_ll.to_crs(CRS_PROJECTED)

print("Pantries:", pantries_ll.shape)
pantries_ll.head(2)

Pantries: (515, 125)


Unnamed: 0,FID,type_fp,type_sk,program,org_phone,distadd,distboro,distzip,dist_location_info,fp_days_orig,...,sk_wed_close3,sk_thu_open3,sk_thu_close3,sk_fri_open3,sk_fri_close3,sk_sat_open3,sk_sat_close3,sk_sun_open3,sk_sun_close3,geometry
0,1,FP,,CHURCH OF ST. NICHOLAS OF TOLENTINE,(718) 295-6800,"2345 University Ave, Bronx, New York, 10468",BX,10468,BASEMENT FORDHAM RD ENTRANCE,TUE,...,,,,,,,,,,POINT (-73.90567 40.86222)
1,2,FP,,BREAD OF LIFE FOOD PANTRY,(347) 235-3723,"1104 Elder Ave, Bronx, New York, 10472",BX,10472,#15,SAT,...,,,,,,,,,,POINT (-73.87854 40.82642)


In [86]:
nta_enriched = gpd.read_file("nta_enriched.geojson")

In [97]:
# 1) Get NTA centroids in projected CRS
nta_proj = nta_enriched.to_crs(CRS_PROJECTED).copy()
centroids = nta_proj.copy()
centroids["geometry"] = nta_proj.centroid

# 2) Get pantries in the same projected CRS
pantries_proj = pantries_ll.to_crs(CRS_PROJECTED).copy()

# 3) Choose a pantry ID column (fallback: index)
if "pantry_id" in pantries_proj.columns:
    pantry_ids = pantries_proj["pantry_id"].astype(str).to_numpy()
elif "name" in pantries_proj.columns:
    pantry_ids = pantries_proj["name"].astype(str).to_numpy()
else:
    pantry_ids = pantries_proj.reset_index().index.astype(str)
    pantries_proj["pantry_id"] = pantry_ids  # save it for later

nta_ids = centroids["nta2020"].astype(str).to_numpy()

# 4) Build coordinate arrays
nta_xy = np.column_stack([centroids.geometry.x, centroids.geometry.y])  # shape (N_ntas, 2)
pantry_xy = np.column_stack([pantries_proj.geometry.x, pantries_proj.geometry.y])  # (N_pantries, 2)

# 5) Pairwise Euclidean distances (meters, because CRS_PROJECTED is in meters)
# dist_matrix[i, j] = distance from NTA i to Pantry j
dist_matrix_miles = dist_matrix_m * 0.000621371

# 6) Long-form table
n_ntas, n_pans = dist_matrix_miles.shape
nta_idx, pantry_idx = np.meshgrid(
    np.arange(n_ntas),
    np.arange(n_pans),
    indexing="ij"
)

dist_nta_pantry = pd.DataFrame({
    "nta2020": nta_ids[nta_idx.ravel()],
    "pantry_id": pantry_ids[pantry_idx.ravel()],
    "distance_miles": dist_matrix_miles.ravel()
})

print(dist_nta_pantry.head())
print("Rows in distance table:", dist_nta_pantry.shape[0])

  nta2020 pantry_id  distance_miles
0  BK0101         0       30.977800
1  BK0101         1       25.113211
2  BK0101         2       14.526881
3  BK0101         3       29.648585
4  BK0101         4        8.843101
Rows in distance table: 136990


In [98]:
dist_nta_pantry 

Unnamed: 0,nta2020,pantry_id,distance_miles
0,BK0101,0,30.977800
1,BK0101,1,25.113211
2,BK0101,2,14.526881
3,BK0101,3,29.648585
4,BK0101,4,8.843101
...,...,...,...
136985,SI9593,510,22.396734
136986,SI9593,511,12.289212
136987,SI9593,512,75.695671
136988,SI9593,513,29.267117


### Add nta id to pantries df

In [99]:
nta_polygons = nta_enriched[["nta2020", "geometry"]].set_geometry("geometry")

# Spatial join: assign each pantry to the NTA polygon it falls within
pantries_with_nta = gpd.sjoin(
    pantries_ll,
    nta_polygons,
    how="left",
    predicate="within"
)

# Drop spatial join index
pantries_with_nta = pantries_with_nta.drop(columns=["index_right"], errors="ignore")

pantries_with_nta.head()


Unnamed: 0,FID,type_fp,type_sk,program,org_phone,distadd,distboro,distzip,dist_location_info,fp_days_orig,...,sk_thu_open3,sk_thu_close3,sk_fri_open3,sk_fri_close3,sk_sat_open3,sk_sat_close3,sk_sun_open3,sk_sun_close3,geometry,nta2020
0,1,FP,,CHURCH OF ST. NICHOLAS OF TOLENTINE,(718) 295-6800,"2345 University Ave, Bronx, New York, 10468",BX,10468,BASEMENT FORDHAM RD ENTRANCE,TUE,...,,,,,,,,,POINT (-73.90567 40.86222),BX0701
1,2,FP,,BREAD OF LIFE FOOD PANTRY,(347) 235-3723,"1104 Elder Ave, Bronx, New York, 10472",BX,10472,#15,SAT,...,,,,,,,,,POINT (-73.87854 40.82642),BX0901
2,4,FP,,CHANCE FOR CHILDREN,(347) 616-3228,"11 Mc Keever Pl, Brooklyn, New York, 11225",BK,11225,1ST FLOOR (BASEMENT LEVEL),FRI,...,,,,,,,,,POINT (-73.95848 40.66569),BK0901
3,6,FP,,HEALTH ESSENTIAL ASSOCIATION INC (BK),(646) 515-6898,"2101 E 16th St, Brooklyn, New York, 11229",BK,11229,2ND FLOOR,FRI (4TH),...,,,,,,,,,POINT (-73.95528 40.59861),BK1502
4,10,FPHA,,AIDS CENTER OF QUEENS COUNTY (WOODSIDE),(718) 472-9400,"62-07 Woodside Ave, Woodside, New York, 11377",QN,11377,3RD FLOOR,"TUE, THUR",...,,,,,,,,,POINT (-73.90219 40.74466),QN0203
