In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
import pandas as pd
from rasterstats import zonal_stats

# Input file paths
sidewalks_path = "/content/drive/My Drive/sidewalk_inventory/Sidewalk_Inventory.shp"  # sidewalks shp
raster_path = "/content/drive/My Drive/boston_20230621_900.TIF"  # shade map
neighborhood_path = "/content/drive/My Drive/2020-census-tracts-in-boston/2020 Census Tracts in Boston.shp"  # neighborhoods

# Load data
sidewalks = gpd.read_file(sidewalks_path)
vector_data = gpd.read_file(neighborhood_path)
raster_data = rasterio.open(raster_path)

# Ensure CRS is consistent (very important)
sidewalks = sidewalks.to_crs(vector_data.crs)

# Step 1: Intersect sidewalks with neighborhoods
sidewalks_neigh = gpd.overlay(vector_data, sidewalks, how='intersection')

# Step 2: Merge all sidewalk geometries (optional, for clipping the raster)
if len(sidewalks) > 1:
    combined_geom = sidewalks.unary_union
else:
    combined_geom = sidewalks.geometry.iloc[0]

# Step 3: Clip the raster to the sidewalk area
out_image, out_transform = mask(raster_data, [mapping(combined_geom)], crop=True, nodata=0)

# Keep only one band
if out_image.ndim > 2:
    out_image = out_image[0]

# Step 4: Zonal statistics over the intersected sidewalk-neighborhood polygons
sidewalk_stats = zonal_stats(sidewalks_neigh, out_image, affine=out_transform, stats=['mean', 'sum'])
sidewalk_stats_df = pd.DataFrame(sidewalk_stats)
sidewalk_stats_df.columns = ['sidewalk_mean', 'sidewalk_sum']

# Attach sidewalk stats to sidewalk-neighborhood GeoDataFrame
sidewalks_neigh = pd.concat([sidewalks_neigh, sidewalk_stats_df], axis=1)

# Step 5: Zonal statistics for the full neighborhood areas (regardless of sidewalks)
total_stats = zonal_stats(vector_data, raster_data.name, stats=['mean', 'sum'])
total_stats_df = pd.DataFrame(total_stats)
total_stats_df.columns = ['total_mean', 'total_sum']

vector_data = pd.concat([vector_data, total_stats_df], axis=1)

# Step 6: Merge the two results based on neighborhood code
# First, group the sidewalk stats by neighborhood
sidewalk_grouped = sidewalks_neigh.groupby("geoid20")[['sidewalk_mean', 'sidewalk_sum']].mean().reset_index()

# Merge stats back into the main neighborhood GeoDataFrame
final = vector_data.merge(sidewalk_grouped, on="geoid20", how="left")

# Step 7: Output selected columns to CSV
final[['geoid20', 'total_sum', 'total_mean', 'sidewalk_sum', 'sidewalk_mean']].to_csv(
    "/content/drive/My Drive/boston_neighborhood_sun_900.csv", index=False
)