## Combine census and OSM data into singular dataframe

In [20]:
import pandas as pd

# Load the OSM data
df_osm = pd.read_csv("../data/osm/massachusetts_osm_zip.csv")
df_osm["zip"] = df_osm["zip"].astype(str).str.zfill(5)

# Load the census data
df_census = pd.read_csv("../data/census/massachusetts_census.csv")
df_census["zip"] = df_census["zip"].astype(str).str.zfill(5)

# Count all food-related POIs per zip code
total_food_access = df_osm.groupby("zip").size().reset_index(name="num_food_access")

# Count specific categories:
num_grocery = df_osm[df_osm["shop"].isin(["supermarket", "grocery"])].groupby("zip").size().reset_index(name="num_grocery")
num_pantries = df_osm[df_osm["amenity"] == "social_facility"].groupby("zip").size().reset_index(name="num_food_pantries")
num_fast_food = df_osm[df_osm["amenity"] == "fast_food"].groupby("zip").size().reset_index(name="num_fast_food")
num_restaurants = df_osm[df_osm["amenity"] == "restaurant"].groupby("zip").size().reset_index(name="num_restaurants")

# Merge individual count DataFrames on 'zip'
df_osm_counts = total_food_access.merge(num_grocery, on="zip", how="left")\
                                 .merge(num_pantries, on="zip", how="left")\
                                 .merge(num_fast_food, on="zip", how="left")\
                                 .merge(num_restaurants, on="zip", how="left")

# Now merge with census data
df_final = pd.merge(df_census, df_osm_counts, on="zip", how="left")
df_final.fillna(0, inplace=True)
display(df_final)

# Save the final DataFrame to a CSV file
df_final.to_csv("../data/processed/massachusetts_food_access.csv", index=False)


Unnamed: 0,state,zip,poverty_rate,median_income_household,percent_lower_education,percent_higher_education,percent_snap_participation,per_capita_income,num_food_access,num_grocery,num_food_pantries,num_fast_food,num_restaurants
0,MA,02663,0.067579,107024.5,0.065041,0.934959,0.091044,104119.0,0.0,0.0,0.0,0.0,0.0
1,MA,02652,0.083333,107024.5,0.491342,0.508658,0.063863,91153.0,2.0,0.0,0.0,0.0,1.0
2,MA,01066,0.619403,107024.5,0.629630,0.370370,0.433333,16546.0,0.0,0.0,0.0,0.0,0.0
3,MA,02647,0.067579,107024.5,0.128492,0.871508,0.091044,82214.0,0.0,0.0,0.0,0.0,0.0
4,MA,02641,0.067579,107024.5,0.122951,0.877049,0.091044,73811.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
518,MA,01007,0.082552,99056.0,0.250489,0.749511,0.103749,46109.0,13.0,0.0,0.0,4.0,7.0
519,MA,01029,0.026565,99188.0,0.432638,0.567362,0.088525,67302.0,0.0,0.0,0.0,0.0,0.0
520,MA,01531,0.130081,99375.0,0.341292,0.658708,0.047368,46536.0,0.0,0.0,0.0,0.0,0.0
521,MA,01351,0.074061,99375.0,0.319862,0.680138,0.165331,38954.0,1.0,0.0,0.0,0.0,1.0
