In [1]:
import geopandas as gpd
import pandas as pd

# Load grocery store data
df = pd.read_csv("../data/cleaned/grocery_stores_v1_curated.csv")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Longitude"], df["Latitude"]),
    crs="EPSG:4326"
)

# Load community area polygons
community_areas = gpd.read_file("../data/geo/community_areas.geojson")

# Drop sjoin conflict column if present in either GeoDataFrame
if "index_right" in gdf.columns:
    gdf = gdf.drop(columns=["index_right"])
if "index_right" in community_areas.columns:
    community_areas = community_areas.drop(columns=["index_right"])

print(f"Grocery points: {gdf.shape}")
print(f"Community polygons: {community_areas.shape}")

# Perform spatial join
joined = gpd.sjoin(gdf, community_areas, how="left", predicate="within")

# Clean up columns
joined = joined.rename(columns={"community": "Community", "area_numbe": "AreaNumber"})
joined[["DBA Name", "Address", "Community", "AreaNumber"]].head()

# Save to file
joined.to_csv("../data/cleaned/grocery_stores_v1_curated.csv", index=False)
print("✅ Spatial join complete and saved.")


Grocery points: (4503, 28)
Community polygons: (77, 6)
✅ Spatial join complete and saved.
