### ENVIRONMENTAL RASTERS (SUMMER, 2025)

In [1]:
import geopandas as gpd
import rasterio
import rasterio.mask
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

In [2]:
# Paths.
cache_env = Path("data/raster")
cache_env.mkdir(parents = True, exist_ok = True)

out_file = cache_env / "environmental_raster_tract_2025.csv"

In [None]:
# Load tracts.
tracts = gpd.read_file("data/nyc_tracts_2020/nyc_tracts_2020.shp")

nyc_prefixes = ("36005", "36047", "36061", "36081", "36085")

tracts = tracts[tracts["GEOID"].str.startswith(nyc_prefixes)].copy()
tracts = tracts.to_crs("EPSG:5070") # For NLCD/land raster alignment.

In [None]:
# Raster paths.
ndvi_path = "data/raster/ndvi_summer_2024.tif"
ndwi_path = "data/raster/ndwi_summer_2024.tif"
impervious_path = "data/raster/nlcd_impervious_2024.tif"

In [None]:
# Define zonal mean function.
def zonal_mean(raster_path, gdf, value_name):
    values = []
    with rasterio.open(raster_path) as src:
        for _, row in tqdm(gdf.iterrows(), total = len(gdf)):
            geom = [row["geometry"]]
            try:
                out_img, out_trans = rasterio.mask.mask(src, geom, crop = True)
                masked = np.where(out_img[0] != src.nodata, out_img[0], np.nan)
                values.append(np.nanmean(masked))
            except:
                values.append(np.nan)
    return pd.Series(values, name = value_name)

In [None]:
# Process.
environment = pd.DataFrame()
environment["GEOID"] = tracts["GEOID"]

environment["ndvi_mean"] = zonal_mean(ndvi_path, tracts, "ndvi_mean")
environment["ndwi_mean"] = zonal_mean(ndwi_path, tracts, "ndwi_mean")
environment["impervious_mean"] = zonal_mean(impervious_path, tracts, "impervious_mean")

environment.to_csv(out_file, index = False)

environment.head()