In [None]:
# PROCESSING THE WALKABILITY DATASET
import geopandas as gpd
# Load the appropriate layer
gdf = gpd.read_file("Natl_WI.gdb", layer="NationalWalkabilityIndex")  # Replace with actual layer name

full_data = gdf.drop(columns=["geometry"])  # drop geometry if you just want tabular data

# We will use STATEFP and COUNTYFP as a "key" to merge with our combined dataset.
# There are 220,000 observations for 3,000 counties. 
# We need to average the walkability index (NatWalkInd) among all the blocks in each county.
# Weighted by area (Shape_Area) of the block.
df = full_data[["STATEFP","COUNTYFP","NatWalkInd","Shape_Area"]].copy()
# Combine STATEFP and COUNTYFP to FP.
df["FP"] = df["STATEFP"].astype(str).str.zfill(2) + df["COUNTYFP"].astype(str).str.zfill(3)
df = df.drop(columns=["STATEFP","COUNTYFP"])

import numpy as np
# Weighted average of NatWalkInd by FP
walkability_df = (
    df.groupby("FP")[["NatWalkInd", "Shape_Area"]]
      .apply(lambda g: (g["NatWalkInd"] * g["Shape_Area"]).sum() / g["Shape_Area"].sum())
      .reset_index(name="Walkability")
)
print(walkability_df.head())



      FP  Walkability
0  01001     3.267533
1  01003     4.104647
2  01005     3.703383
3  01007     4.165608
4  01009     4.360420


In [35]:
print(full_data['population'])

KeyError: 'population'