In [3]:
import pandas as pd, geopandas as gpd, pathlib

DATA_DIR = pathlib.Path("../data/cleaned")
GEO_DIR = pathlib.Path("../docs")
STORES_CSV = DATA_DIR / "grocery_stores_cleaned_v4.csv"
AREAS_GJ = GEO_DIR  / "community_areas.geojson"
JOIN_OUT = DATA_DIR / "stores_with_areas.csv"

# ── load stores (only the columns we need)
stores = (
    pd.read_csv(STORES_CSV, dtype={"Zip": str})
      .loc[:, ["DBA Name", "Address", "Latitude", "Longitude",
               "Community", "IS_REAL_GROCERY"]]
)

# ── cast to GeoDataFrame
g_stores = gpd.GeoDataFrame(
    stores,
    geometry = gpd.points_from_xy(stores.Longitude, stores.Latitude),
    crs      = "EPSG:4326"
)

# ── load polygons
areas = gpd.read_file(AREAS_GJ)[["community", "geometry"]]
areas = areas.rename(columns={"community": "Community"})

# ── spatial join
joined = gpd.sjoin(g_stores, areas, how="inner", predicate="within")

#  keep the store-side community name and drop the duplicate column
joined = (
    joined
      .rename(columns={"Community_left": "Community"})
      .drop(columns=["Community_right", "index_right"])
)

joined.to_csv(JOIN_OUT, index=False)
print(f"✅ Spatial join saved ➜ {JOIN_OUT} · rows={len(joined):,}")

✅ Spatial join saved ➜ ../data/cleaned/stores_with_areas.csv · rows=4,504


In [18]:
import numpy as np

# keep REAL grocery stores only
real_only = joined.query("IS_REAL_GROCERY")

# aggregate counts
agg = (
    real_only
      .groupby("Community")
      .size()
      .rename("StoreCount")
      .reset_index()
)

# turn counts → 0-4 score
bins   = [0, 1, 3, 6, 10, np.inf]        # 0,1-2,3-5,6-9,10+
scores = pd.cut(
    agg.StoreCount,
    bins=bins,
    labels=[0, 1, 2, 3, 4],
    right=False
).astype(int)
agg["AccessScore"] = scores

print(f"🏷  Communities scored: {len(agg)}")

# merge back onto the polygon layer so Folium can read it
areas_scored = (
    areas.merge(agg, on="Community", how="left")
         .fillna({"StoreCount": 0, "AccessScore": 0})
)

out_gj = DATA_DIR / "community_area_scores.geojson"
areas_scored.to_file(out_gj, driver="GeoJSON")
print(f"✅ GeoJSON written ➜ {out_gj}")


🏷  Communities scored: 70
✅ GeoJSON written ➜ ../data/cleaned/community_area_scores.geojson


In [19]:
OUT_GJ = DATA_DIR / "community_area_scores.geojson"
OUT_CSV= DATA_DIR / "community_area_scores.csv"

# left-join so every polygon is preserved, missing → 0
out = areas.merge(agg, on="Community", how="left")
out["StoreCount"]  = out.StoreCount.fillna(0).astype(int)
out["AccessScore"] = out.AccessScore.fillna(0).astype(int)

out.to_file(OUT_GJ, driver="GeoJSON")
out.drop(columns="geometry").to_csv(OUT_CSV, index=False)

print("✅ Scores written:")
print(f"  • {OUT_GJ}")
print(f"  • {OUT_CSV}")


✅ Scores written:
  • ../data/cleaned/community_area_scores.geojson
  • ../data/cleaned/community_area_scores.csv
