
# NYC Food Access â€” Geospatial Starter Notebook

This notebook helps you:
1. Load **NYC census tracts** and **pantry locations** (GeoJSON/CSV).
2. Normalize coordinate reference systems (CRS).
3. Map pantries to tracts, compute counts and distances to nearest pantry.
4. (Optional) Join **supply gap / demand** data if available (e.g., by neighborhood or tract).
5. Make quick choropleths and export clean files for optimization.

> Update file paths in the first cell, then run through top-to-bottom.


In [None]:
RAW_PANTIRIES_PATH = "raw/pantries.geojson"
CENSUS_TRACTS_PATH = "raw/2020_Census_Tracts_20251110.geojson"   # e.g., 2020 Census tracts GeoJSON/GeoPackage/SHAPE
PANTRIES_PATH      = "data/pantry_locations.csv"    # can be a GeoJSON of points OR a CSV with lat/lon columns

In [21]:
from pathlib import Path
import json, pandas as pd

in_path = Path(RAW_PANTIRIES_PATH)  # <- change if needed
data = json.loads(in_path.read_text(encoding="utf-8"))

DAYS = [("mon","Mon"), ("tue","Tue"), ("wed","Wed"), ("thu","Thu"),
        ("fri","Fri"), ("sat","Sat"), ("sun","Sun")]

def hours_from_properties(props, prefix="fp"):
    """
    Build a weekly hours summary like:
    "Tue 09:00 AM-11:00 AM; Thu 01:00 PM-03:00 PM"
    based on fields like fp_tue_open1/fp_tue_close1, fp_tue_open2/...
    """
    parts = []
    for key, label in DAYS:
        open_flag = (props.get(f"{prefix}_{key}") or "").lower()
        if open_flag != "open":
            continue
        spans = []
        for k in ("1","2","3"):
            o = props.get(f"{prefix}_{key}_open{k}")
            c = props.get(f"{prefix}_{key}_close{k}")
            if o and c:
                spans.append(f"{o}-{c}")
        if spans:
            parts.append(f"{label} {', '.join(spans)}")
    return "; ".join(parts)

# --- added helpers matching your earlier pattern ---
def fmt_day(props_prefix, d, props):
    # e.g., props_prefix="fp", d="tue"
    open_flag = (props.get(f"{props_prefix}_{d}") or "").lower()
    if open_flag != "open":
        return ""
    spans = []
    for k in ("1","2","3"):
        o = props.get(f"{props_prefix}_{d}_open{k}")
        c = props.get(f"{props_prefix}_{d}_close{k}")
        if o and c:
            spans.append(f"{o}-{c}")
    return ", ".join(spans)

def weekly_summary(props, prefix="fp"):
    parts = []
    for d,label in DAYS:
        s = fmt_day(prefix, d, props)
        if s:
            parts.append(f"{label} {s}")
    return "; ".join(parts) if parts else ""
# -----------------------------------------------

def flatten_feature(feature):
    geom = feature.get("geometry") or {}
    coords = (geom.get("coordinates") or [None, None])
    lon, lat = (coords[0], coords[1])

    props = feature.get("properties") or {}

    # prefer explicit lat/lon in properties if present; fall back to geometry
    lat = props.get("lat", lat)
    lon = props.get("lon", lon)

    row = {
        "program": props.get("program"),
        "lat": lat,
        "lng": lon,
        "type_fp": props.get("type_fp"),
        "type_sk": props.get("type_sk"),
        "phone": props.get("org_phone"),
        "address": props.get("distadd"),
        "boro": props.get("distboro"),
        "zip": str(props.get("distzip") or ""),
        "loc_info": props.get("dist_location_info"),
        "status": props.get("status"),
        "program_type": props.get("program_type"),

        # use the new weekly_summary (you could swap to hours_from_properties if you prefer)
        "fp_hours": weekly_summary(props, "fp"),
        "sk_hours": weekly_summary(props, "sk"),
    }
    return row

rows = [flatten_feature(f) for f in data.get("features", [])]
df = pd.DataFrame(rows)

df.head()
df.to_csv("data/pantries.csv", index=False)

## Census Tracts

In [24]:
try:
    from shapely.geometry import shape
    HAS_SHAPELY = True
except Exception:
    HAS_SHAPELY = False

# ---- Load your data ----
in_path = Path(CENSUS_TRACTS_PATH)  # <-- change to your file
text = in_path.read_text(encoding="utf-8").strip()

# Accept either a FeatureCollection or a list/NDJSON of Features
features = []
try:
    obj = json.loads(text)
    if obj.get("type") == "FeatureCollection":
        features = obj.get("features", [])
    elif obj.get("type") == "Feature":
        features = [obj]
    else:
        # maybe it's a plain list of features
        if isinstance(obj, list):
            features = obj
except json.JSONDecodeError:
    # NDJSON fallback
    for line in text.splitlines():
        line = line.strip()
        if line:
            features.append(json.loads(line))

def rough_centroid_lonlat(geometry):
    """
    Fallback centroid for lon/lat when shapely isn't available:
    average all exterior ring vertices of all polygons.
    """
    gtype = geometry.get("type")
    coords = geometry.get("coordinates", [])
    lons, lats = [], []
    if gtype == "Polygon":
        rings = coords  # [exterior, holes...]
        if rings:
            for x, y in rings[0]:
                lons.append(x); lats.append(y)
    elif gtype == "MultiPolygon":
        for poly in coords:  # each poly = [exterior, holes...]
            if poly and poly[0]:
                for x, y in poly[0]:
                    lons.append(x); lats.append(y)
    if not lons:
        return (None, None)
    return (sum(lons)/len(lons), sum(lats)/len(lats))

def flatten_feature(f):
    props = f.get("properties", {}) or {}
    geom = f.get("geometry", {}) or {}

    if HAS_SHAPELY:
        try:
            centroid = shape(geom).centroid
            lon, lat = float(centroid.x), float(centroid.y)
        except Exception:
            lon, lat = rough_centroid_lonlat(geom)
    else:
        lon, lat = rough_centroid_lonlat(geom)

    # Pick the property fields you care about (adjust as needed)
    row = {
        "geoid": props.get("geoid") or props.get("GEOID") or "",
        "ctlabel": props.get("ctlabel"),
        "ct2020": props.get("ct2020"),
        "boroct2020": props.get("boroct2020"),
        "borocode": props.get("borocode"),
        "boroname": props.get("boroname"),
        "ntaname": props.get("ntaname"),
        "nta2020": props.get("nta2020"),
        "cdeligibil": props.get("cdeligibil"),
        "cdta2020": props.get("cdta2020"),
        "cdtaname": props.get("cdtaname"),
        "shape_area": props.get("shape_area"),
        "shape_leng": props.get("shape_leng"),
        # timestamps/ids if useful to you:
        "created_at": props.get(":created_at"),
        "updated_at": props.get(":updated_at"),
        "row_id": props.get(":id"),
        # centroid for mapping/table joins:
        "centroid_lat": lat,
        "centroid_lon": lon,
    }
    return row

rows = [flatten_feature(f) for f in features]
df = pd.DataFrame(rows).drop_duplicates()

# Save a tidy CSV
out_csv = "census_tracts_tidy.csv"
df.to_csv(out_csv, index=False)
print(f"Saved {out_csv} with {len(df)} rows")

# (Optional) also write centroid points as a small GeoJSON you can drop on a map
centroid_features = []
for r in rows:
    if r["centroid_lon"] is None or r["centroid_lat"] is None:
        continue
    centroid_features.append({
        "type": "Feature",
        "geometry": {"type": "Point",
                     "coordinates": [r["centroid_lon"], r["centroid_lat"]]},
        "properties": {
            "geoid": r["geoid"],
            "boroname": r["boroname"],
            "ctlabel": r["ctlabel"],
            "ntaname": r["ntaname"]
        }
    })

if centroid_features:
    out_geojson = "census_tract_centroids.geojson"
    with open(out_geojson, "w", encoding="utf-8") as f:
        json.dump({"type": "FeatureCollection", "features": centroid_features}, f)
    print(f"Saved {out_geojson} with {len(centroid_features)} points")


Saved census_tracts_tidy.csv with 2325 rows
Saved census_tract_centroids.geojson with 2325 points
