In [None]:
"""
Summaries of USFS TreeMap linked to FIA plot data
Emphasis on 
    - Metrics of forest composition
    - Ecological gradients of species dominance
    - Forest structure (abundance, dominance, diversity, stand height)

Aggregate these statistics to FRP gridcells.

Author: maxwell.cook@colorado.edu
"""

import os, sys, time
import pandas as pd
import rioxarray as rxr
import xarray as xr
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.colors import to_rgba

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

albers = 'EPSG:5070' # albers CONUS
utm = 'EPSG:32613' # UTM Zone 13N

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")

In [None]:
# load the aggregated FRP grids (regular 375m2 grids summarizing FRP from VIIRS)
fp = os.path.join(projdir,'data/spatial/mod/VIIRS/viirs_snpp_jpss1_afd_latlon_fires_pixar_gridstats.gpkg')
grid = gpd.read_file(fp)
print(f"\nThere are [{len(grid)}] grids across [{len(grid['Fire_ID'].unique())}] fires.\n")

# create a unique ID
grid['grid_idx'] = grid['Fire_ID'].astype(str) + grid['grid_index'].astype(str)

# add the centroid lat/lon to the grid data
df = grid.to_crs(4326) # WGS coords for lat/lon
df['x'] = df.geometry.centroid.x  # Longitude (x-coordinate)
df['y'] = df.geometry.centroid.y
grid = grid.merge(df[['grid_idx','x','y']], on='grid_idx', how='left')
del df
print(f"\n{grid.columns}\n")

# Drop any dupicate grids ...
print(f"Dropping [{grid.duplicated(subset=['grid_idx']).sum()}] duplicate grids.\n")
grid = grid.drop_duplicates(subset=['grid_idx'], keep='first')

In [None]:
# Check out the distribution of grid overlap with FRP observations
thresh = 0.10
print(f"Fractional overlap:\n{grid['overlap'].describe()}\n")
n_small = grid[grid['overlap'] < thresh]['grid_idx'].count() # less than 5% spatial overlap

# Plot the distribution of the fractional overlap
plt.figure(figsize=(6,3))
sns.histplot(grid['overlap'], kde=True, bins=50, color='dodgerblue', alpha=0.7)

# Add vertical line for the threshold and for 100%
plt.axvline(x=thresh, color='red', linestyle='--', label=f'{thresh*100}% Threshold')
plt.axvline(x=1, color='grey', linestyle='--', label='100% Overlap')

# Customize the plot
plt.title('Distribution of Grid Overlap Fraction')
plt.xlabel('Fractional Overlap')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.5)
plt.text(16.5, plt.ylim()[1] * 0.7, 
         f'N = {n_small} [{round(n_small/len(grid)*100,2)}%]', 
         fontsize=10, color='black')

# Save the plot
out_path = os.path.join(projdir, 'figures/grid_overlap_distribution.png')
plt.savefig(out_path, dpi=300, bbox_inches='tight')

plt.show()
print(f"Plot saved to: {out_path}")

In [None]:
# filter out grids below the overlap threshold
print(f"Dropping [{n_small} ({round(n_small/len(grid)*100,2)}%)] grids with <{thresh*100}% fractional overlap.")
grid = grid[grid['overlap'] >= thresh] # remove these observations
print(len(grid))

In [None]:
# Load the FORTYPCD and the Tree Table summaries

# FORTYPCD
fp = os.path.join(projdir,'data/tabular/mod/viirs_snpp_jpss1_gridstats_fortypnm_gp.csv')
fortyp = pd.read_csv(fp)
print(f"FORTYPCD columns:\n{fortyp.columns}\n")

# Tree Table
fp = os.path.join(projdir,'data/tabular/mod/gridstats_treetable.csv')
trees = pd.read_csv(fp)
trees.drop(columns=['Unnamed: 0','forest_pct'], inplace=True)
print(f"Tree Table columns:\n{trees.columns}\n")

# Check on how many grids match
print(f"\tFORTYP unique grids: {len(fortyp['grid_idx'].unique())}")
print(f"\t\t total rows: {len(fortyp)}")
print(f"\tTree Table unique grids: {len(trees['grid_idx'].unique())}")
print(f"\t\t total rows: {len(trees)}")

In [None]:
# check how many matching grids
match = fortyp[fortyp['grid_idx'].isin(trees['grid_idx'].unique())]
# Check on how many grids match
print(f"\tFORTYP unique grids: {len(match['grid_idx'].unique())}")
print(f"\t\t total rows: {len(match)}")
print(f"\tTree Table unique grids: {len(trees['grid_idx'].unique())}")
print(f"\t\t total rows: {len(trees)}")
del match
gc.collect()

In [None]:
# calculate the dominant forest type using the FORTYPCD
dfortyp = fortyp.loc[fortyp.groupby('grid_idx')['fortyp_pct'].idxmax()]
dfortyp = dfortyp[[
    'grid_idx','fortypnm_gp','fortyp_pct',
    'forest_pct','canopypct_mean','balive_sum'
]]
dfortyp = dfortyp[dfortyp['grid_idx'].isin(trees['grid_idx'].unique())] # retain matching grids
dfortyp.columns

In [None]:
dfortyp['fortypnm_gp'].unique()

In [None]:
# Check on how many matches there are between the dominant type and the Tree Table
matches = (dfortyp['grid_idx'].isin(trees[trees['species_gp_n'].isin(dfortyp['fortypnm_gp'])]['grid_idx'])).sum()
# Print the results
total_grids = dfortyp['grid_idx'].nunique()
print(f"\nTotal grids: {total_grids}\n")
print(f"{matches} ({(matches / total_grids) * 100:.2f}%)\n")
del matches, total_grids
gc.collect()

In [None]:
# merge to the tree table metrics
tree_metrics = dfortyp.merge(trees, on='grid_idx', how='left')
tree_metrics.head()

In [None]:
print(len(tree_metrics))

In [None]:
tree_metrics.columns

In [None]:
tree_metrics[['grid_idx','fortypnm_gp','species_gp_n',
              'fortyp_pct','canopypct_mean','balive_sum',
              'tpp_ld_pr','ba_ld_pr','qmd_ld_pr']].head(12)

In [None]:
# Load the climate and topography information

# topography
fp = os.path.join(projdir,'data/earth-engine/exports/gridstats_topo.csv')
topo = pd.read_csv(fp)
# create a unique ID
topo['grid_idx'] = topo['Fire_ID'].astype(str) + topo['grid_index'].astype(str)
print(f"\n{topo.columns}\n")

# climate (gridmet)
fp = os.path.join(projdir,'data/earth-engine/exports/gridstats_gridmet_full.csv')
climate = pd.read_csv(fp)
print(f"\n{climate.columns}\n")

In [None]:
# tidy and merge the climate/topo
climate.rename(columns={'first_obs': 'first_obs_date'}, inplace=True) # for joining to the grid data
climate = climate[[
    'Fire_ID', 'first_obs_date', 'erc', 'erc_dv', 
    'fm1000', 'fm1000_dv', 'rmin', 'rmin_dv', 'tmmx', 'tmmx_dv', 
    'vpd', 'vpd_dv', 'vs', 'vs_dv'
]] # keep needed columns
climate['Fire_ID'] = climate['Fire_ID'].astype(str) # to match the grid column
grid['first_obs_date'] = grid['first_obs_date'].astype(str) # to match gee output
# merge climate to the grid by fire id and first acquisition day
grid_clim = grid.merge(climate, on=['Fire_ID','first_obs_date'], how='left')
print(f"\n{grid_clim.columns}\n")

In [None]:
# merge the topography to the grid
topo = topo[['grid_idx', 'elev', 'slope', 'chili', 'tpi']]
grid_clim_topo = grid_clim.merge(topo, on='grid_idx', how='left')
grid_clim_topo.columns

In [None]:
tree_metrics.columns

In [None]:
# merge the FRP, climate, and topogrpahy to the forest metrics table
grid_clim_topo['grid_idx'] = grid_clim_topo['grid_idx'].astype(int)
grid_tm = tree_metrics.merge(grid_clim_topo, on=['grid_idx'], how='inner')
print(grid_tm.head(3))

In [None]:
len(grid_tm)

In [None]:
# save this file out.
out_fp = os.path.join(projdir,'data/tabular/mod/gridstats_fortypnm_gp_tm_ct.csv')
grid_tm.to_csv(out_fp)
print(f"Saved file to: {out_fp}")