# Extract zonal stats using rasterstats

* Takes a while to run unfortunately, bc zonal stats can't do multi-band extractions need to each year separately
* Zonal stats only has binary in/not-in polygon. Can't tell what % of raster pixel is in polygon
* Rasters need to be reprojected to NAD83/UTM14 (EPSG:26914)
* Runs at 100m and 1km buffer

In [None]:
import rasterstats
import rasterio as rio
import glob
import os
import re
import geopandas as gpd
import numpy as np
import pandas as pd

In [None]:
raster_dir = '../data/landcover/'
raster_list = glob.glob(os.path.join(raster_dir, '*.tif'))
raster_list.sort()
year_list = [re.search(r'y(\d{4})', rpath).group(1) for rpath in raster_list]

In [None]:
playa_shpfile = '../data/shapefiles/playa_v5_summary_atts/playa_v5_summary_atts.shp'

# First need to create polygons for extraction

In [None]:
playa_gdf = gpd.read_file(playa_shpfile)

In [None]:
playa_gdf = playa_gdf[['id','geometry']]

In [None]:
buffer_100_gdf = playa_gdf.copy()
buffer_100_gdf = playa_gdf.geometry.buffer(100)

In [None]:
buffer_100_gdf.to_file('../data/shapefiles/buffer_playas_100m.shp')

In [None]:
buffer_1km_gdf = playa_gdf.copy()
buffer_1km_gdf = playa_gdf.geometry.buffer(1000)
buffer_1km_gdf.to_file('../data/shapefiles/buffer_playas_1km.shp')

# Now, rasterstats

In [None]:
def extract_from_year(shpfile, raster_list, year):
    rp = np.array(raster_list)[[str(year) in rp for rp in raster_list]][0]
    pixel_vals = rasterstats.zonal_stats(shpfile,
                            rp,
                          categorical=True)
    return pixel_vals

In [None]:
output_df = pd.DataFrame({'id':pd.read_csv('../data/playa_nogeometry.csv')['id']})
for y in year_list:
    pixel_vals = extract_from_year('../data/shapefiles/buffer_playas_100m.shp', raster_list, y)
    output_df.loc[:, str(y)+'_100m'] = pixel_vals
    print(y, 'done')

In [None]:
output_df.to_csv('../data/buffer_playas_100m_counts_all.csv', index=False)

In [None]:
for y in year_list:
    pixel_vals = extract_from_year('../data/shapefiles/buffer_playas_1km.shp', raster_list, y)
    output_df.loc[:, str(y)+'_1km'] = pixel_vals
    print(y, 'done')

In [None]:
output_df.to_csv('../data/buffer_playas_all_counts_all.csv', index=False)