In [None]:
import glob
import os
import geopandas as gpd
from rasterstats import zonal_stats

# Aggregated Terrain Statistics

In [26]:
# path to polygon shapefile defining zones to calculate aggregated statistics
watersheds_path = '../Datasets/nhd/ky_huc10_26916.shp'

# path to dem
dem_path = r'../Datasets/dem_10m/dem_clipped_26916.tif'

# path to terrain features derived from dem (as glob object)
terrain_feature_paths = glob.glob('../Datasets/terrain_features/*.tif')

# normalize the glob object paths for consistency
terrain_feature_paths = [os.path.normpath(path) for path in terrain_feature_paths]

In [32]:
# load the polygon shapefile as a geodataframe
watershed_boundaries = gpd.read_file(watersheds_path)

# set huc10 code as index, drop existing index
watershed_boundaries.set_index('huc10', drop=True, inplace=True)

# define statistics to calculate (built in to rasterstats)
statistics = ['count', 'sum', 'majority', 'range', 'mean', 'std', 'min', 'percentile_10', 'percentile_25', 'median', 'percentile_75', 'percentile_90', 'max']

# Calculate zonal statistics
stats = zonal_stats(watershed_boundaries, dem_path, stats = statistics, all_touched = True)

# Adding each statistic as a new column in the GeoDataFrame
for key in statistics:
    watershed_boundaries[key] = [stat[key] for stat in stats]

# save to .csv file
watershed_boundaries.drop(columns=['loaddate', 'name', 'geometry').to_csv('huc10_statistics_dem.csv')

In [None]:
def zonal_statistics_to_csv(polygon_path, raster_path, statistics, output_name, index_col=None, drop_cols=None):
    
    gdf = gpd.read_file(polygon_path)

    if index not None:
        gdf.set_index(index_col, drop=True, inplace=True)

    stats = zonal_stats(gdf, raster_path, stats=statistics, all_touched=True)

    for key in statistics:
        gdf[key] = [stat[key] for stat in stats]

    if drop not None:
        gdf.drop(columns=drop_cols, drop=True, inplace=True)

    gdf.to_csv(output_name)