# Process MODIS Vegetation Indices

### Prepare Workspace

In [1]:
# Import system libraries
import os
import sys

# Import data manipulation libraries
import pandas as pd
import numpy as np
from shapely.geometry import Polygon, mapping

# Import geospatial libraries
import geopandas as gpd
import xarray as xr
import rioxarray
import rasterio.mask

# Import API libraries
import pystac_client
import planetary_computer
import odc.stac
import rich.table

# Import visualisation libraries (optional)
import xrspatial
from datashader.transfer_functions import shade, stack
from datashader.colors import Elevation

# Set working directory
os.chdir('/Users/jessicarapson/Documents/GitHub/water-supply-forecast')



### Load Data from API

In [167]:
# Call API
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

# Load in site geospatial data
gdf_sites = gpd.read_file('assets/data/geospatial.gpkg')

# Initialize an empty list to store catchment bounding boxes
site_bboxes = []

# Iterate through each polygon (catchment) in the GeoDataFrame
for index, row in gdf_sites.iterrows():
    # Get the bounding box for each polygon
    bbox = row.geometry.bounds  # Extract the bounding box as (minx, miny, maxx, maxy)
    site_bboxes.append(bbox)  # Append the bounding box to the list
    
# Initialise dataframes to store extracted information
df_jan = pd.DataFrame(
    {'site_id': gdf_sites['site_id'],'year': np.nan,'month': np.nan,
     'max_veg':np.nan,'min_veg': np.nan,
     'mean_veg': np.nan,'med_veg': np.nan,'percent_no_veg': np.nan,
     'percent_bare_soil': np.nan,'percent_any_veg': np.nan,
     'percent_sparse_veg': np.nan,'percent_dense_veg': np.nan})
df_apr = df_jan.copy()
df_jul = df_jan.copy()
df_oct = df_jan.copy()

# Create dataframe of months
months_dataframes = {'January': df_jan, 'April': df_apr, 'July': df_jul, 'October': df_oct}

# Collect data for each year
df_all = []

# Loop through years
for year in range(2023,2023 + 1):

    # Loop through catchment polygons
    for i in range(0,len(gdf_sites)):
        print("Processing MODIS for:", str(year), gdf_sites.iloc[i]['site_id'],
          f"({i + 1}/{len(gdf_sites)})")

        # Load the catchment polygon
        catchment_polygon = gdf_sites.geometry.iloc[i]

        # Select catchment bounding box
        bbox = site_bboxes[i]

        # Select dates
        months = {
            "January": "01",
            "April": "04",
            "July": "07",
            "October": "10",
        }
        items_season = dict()

        # Search using bounding box coordinates
        items_full = []
        for name, number in months.items():
            datetime = f"{year}-{number}"
            search = catalog.search(
                collections=["modis-13A1-061"],
                bbox=bbox,
                datetime=datetime,
            )
            items_season[name] = search.item_collection()[0]
            items = list(search.item_collection())
            items_trunc = items[0:len(set([tuple(x.bbox) for x in items]))]
            items_full += items_trunc

        # Load and merge data into xarray
        datasets = []
        item_num = 0
        for item in items_full:
            item_num += 1
            print("Processing item:", f"{item_num}/{len(items_full)}")
            data = odc.stac.load(
                [item],
                crs="EPSG:32610",
                bands="500m_16_days_NDVI",
                resolution=500,
                bbox=bbox)
            try:
                # Clip data for each catchment polygon
                data_clipped = data.rio.clip(
                    gdf_sites.geometry.apply(mapping)[[i]], gdf_sites.crs)
                # Mask areas outside the catchment polygon with NaN
                mask = ~data_clipped.isnull()
                data_clipped = data_clipped.where(mask)
                # Append data
                datasets.append(data_clipped)
            except rioxarray.exceptions.NoDataInBounds:
                continue  # Skip to the next item if no data is found

        # Merge the datasets using xarray
        merged_data = xr.concat(datasets, dim='item_index').mean(dim='item_index')

        # Convert data to raster
        raster = items_season["January"].assets["500m_16_days_NDVI"].extra_fields["raster:bands"]
        data = merged_data["500m_16_days_NDVI"] * raster[0]["scale"]

        # Clip data again so outside values are NaN
        data_clipped = data.rio.clip(gdf_sites.geometry.apply(mapping)[[i]], gdf_sites.crs)

        # Iterate through each month DataFrame
        for month, df in months_dataframes.items():

            # Label year
            df.at[i,'year'] = year

            # Select season
            data_season = data_clipped.sel(time=data['time.month'] == int(months[month]))[0]

             # Label month
            df.at[i,'month'] = np.datetime_as_string(data_season.time.values, unit='M')[-2:]

            # Extract mean, minimum, and average vegetation for catchment
            df.at[i,'max_veg'] = np.nanmean(data_season)
            df.at[i,'min_veg'] = np.nanmin(data_season)
            df.at[i,'mean_veg'] = np.nanmean(data_season)
            df.at[i,'med_veg'] = np.nanmedian(data_season)

            # Calculate percent vegetation exceeding various thresholds
            total_cells = np.sum(~np.isnan(data_season.values))
            df.at[i,'percent_no_veg'] = np.sum(data_season.values < 0) / total_cells
            df.at[i,'percent_bare_soil'] = np.sum(
                (data_season.values < 0.01) & (data_season.values > -0.01)) / total_cells
            df.at[i,'percent_any_veg'] = np.sum(data_season.values > 0) / total_cells
            df.at[i,'percent_sparse_veg'] = np.sum(
                (data_season.values > 0.1) & (data_season.values < 0.5)) / total_cells
            df.at[i,'percent_dense_veg'] = np.sum(data_season.values > 0.6) / total_cells

             # Export clipped and compressed raster file
            data_season.rio.to_raster('assets/data/modis/' + gdf_sites.iloc[i].site_id
                                      + '_' + str(year) + '_' + month + '_nvdi.tif')

    # Combine month dataframes
    df_year = pd.concat([df_jan, df_apr, df_jul, df_oct], ignore_index=True)
    df_all.append(df_year)
    print('\n###################################################\n')

# Export dataframe
df = pd.concat(df_all, ignore_index=True)
df.to_csv('assets/data/modis/modis_summary.csv', index=False)

Processing MODIS for: 2000 hungry_horse_reservoir_inflow (1/26)


IndexError: list index out of range