# Processing Open Street Maps Data

Retrieve data from OSM regarding locations of food pantries, fast food restaurants, grocery stores.

[Geofabrik Downloads](https://download.geofabrik.de/north-america/us.html) by state.

## Filter OSM file to find grocery stores, food pantries, restaurants, etc...

In [None]:
from pyrosm import OSM
import pandas as pd

# This script extracts points of interest (POIs) related to food security from an OSM file.
def extract_food_security_pois(osm_path):
    osm = OSM(osm_path)
    pois = osm.get_pois()

    tag_filters = (
        (pois["shop"].isin(["supermarket", "grocery", "farm", "garden_centre"])) |
        (pois["amenity"].isin(["food_bank", "social_facility", "marketplace", "restaurant", "cafe", "fast_food"])) |
        (pois["landuse"].isin(["farm"])) |
        (pois["building"] == "brewery")
    )

    filtered = pois[tag_filters].copy()

    # Subtags to add extra context
    filtered["subtype"] = filtered["social_facility"].fillna("")

    return filtered

# Save the filtered POIs to a CSV file
def save_to_csv(dataframe, output_path):
    dataframe.to_csv(output_path, index=False)
    print(f"Data saved to {output_path}")

filtered_pois = extract_food_security_pois("../data/osm/massachusetts.osm.pbf")
save_to_csv(filtered_pois, "../data/osm/massachusetts_osm.csv")


Data saved to ../data/osm/massachusetts_osm.csv


## Use shapefiles to assign zip code to each based off of lat and lon coordinates

In [2]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from IPython.display import display

# Load in OSM data
df_osm = pd.read_csv("../data/osm/massachusetts_osm.csv")

# Get rid of OSM data without lat/lon
df_osm = df_osm[df_osm['lat'].notnull() & df_osm['lon'].notnull()]

# Create a geometry column from lon/lat
geometry = [Point(xy) for xy in zip(df_osm['lon'], df_osm['lat'])]
gdf_osm = gpd.GeoDataFrame(df_osm, geometry=geometry, crs="EPSG:4326")

# Load the ZIP code shapefile, convert the CRS if necessary
gdf_zip = gpd.read_file("../data/shapefiles/tl_2020_us_zcta520.shp")
gdf_zip = gdf_zip.to_crs("EPSG:4326")

# Perform a spatial join to find which ZIP code each OSM point falls into
gdf_osm = gpd.sjoin(gdf_osm, gdf_zip[['ZCTA5CE20', 'geometry']], how="left", predicate="within")

# Rename the joined ZIP 
gdf_osm.rename(columns={"ZCTA5CE20": "zip"}, inplace=True)

# Make sure ZIP codes are 5 digits
gdf_osm["zip"] = gdf_osm["zip"].astype(str).str.zfill(5)

# Save the resulting GeoDataFrame to a CSV file
gdf_osm.to_csv("../data/osm/massachusetts_osm_zip.csv", index=False)

  df_osm = pd.read_csv("../data/osm/massachusetts_osm.csv")


## Visualize data on a map

In [None]:
import folium
import pandas as pd

# Categorize each POI based on tags
def categorize_poi(row):
    if row.get("social_facility") in ["food_bank", "soup_kitchen"]:
        return row["social_facility"]
    elif row.get("amenity") == "marketplace":
        return "farmers_market"
    elif row.get("shop") in ["supermarket", "grocery", "farm", "garden_centre"]:
        return row["shop"]
    elif row.get("amenity") == "cafe" and row.get("cuisine") == "coffee_shop":
        return "coffee_shop"
    elif row.get("building") == "brewery":
        return "brewery"
    elif row.get("amenity") == "restaurant":
        return "restaurant"
    elif row.get("landuse") == "farm":
        return "urban_farm"
    else:
        return "other"


# Load the data
df = pd.read_csv("../data/osm/massachusetts_osm.csv")
df["category"] = df.apply(categorize_poi, axis=1)

# Filter only rows that have latitude and longitude
df = df.dropna(subset=["lat", "lon"])

# Assign a color per amenity
color_map = {
    "supermarket": "green",
    "grocery": "green",
    "farm": "green",
    "farmers_market": "green",
    "food_bank": "red",
    "soup_kitchen": "red",
    "coffee_shop": "red",
    "restaurant": "blue",
    "brewery": "blue",
    "urban_farm": "green",
    "garden_centre": "green",
    "other": "lightgray"
}

# Center of Massachussetts as fallback
m = folium.Map(location=[df["lat"].mean(), df["lon"].mean()], zoom_start=7)

for _, row in df.iterrows():
    if pd.isna(row["lat"]) or pd.isna(row["lon"]):
        continue

    category = row["category"]
    color = color_map.get(category, "lightgray")
    name = row.get("name", "Unknown")

    folium.CircleMarker(
        location=[row["lat"], row["lon"]],
        radius=4,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        popup=f"{name} ({category})"
    ).add_to(m)

display(m)


  df = pd.read_csv("../data/osm/massachusetts_osm.csv")
