In [1]:
"""
Summaries of USFS TreeMap linked to FIA plot data
Emphasis on 
    - Metrics of forest composition
    - Ecological gradients of species dominance
    - Forest structure (abundance, dominance, diversity, stand height)

Aggregate these statistics to FRP gridcells.

Author: maxwell.cook@colorado.edu
"""

import os, sys, time
import pandas as pd
import rioxarray as rxr
import xarray as xr
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.colors import to_rgba

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

albers = 'EPSG:5070' # albers CONUS
utm = 'EPSG:32613' # UTM Zone 13N

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")

Ready to go !


In [2]:
# load the aggregated FRP grids (regular 375m2 grids summarizing FRP from VIIRS)
fp = os.path.join(projdir,'data/spatial/mod/VIIRS/viirs_snpp_jpss1_afd_latlon_fires_pixar_gridstats.gpkg')
grid = gpd.read_file(fp)
print(f"\nThere are [{len(grid)}] grids across [{len(grid['Fire_ID'].unique())}] fires.\n")

# add the centroid lat/lon to the grid data
df = grid.to_crs(4326) # WGS coords for lat/lon
df['x'] = df.geometry.centroid.x  # Longitude (x-coordinate)
df['y'] = df.geometry.centroid.y
grid = grid.merge(df[['grid_index','x','y']], on='grid_index', how='left')
del df
print(f"\n{grid.columns}\n")

# Drop any dupicate grids ...
print(f"Dropping [{grid.duplicated(subset=['grid_index']).sum()}] duplicate grids.\n")
grid = grid.drop_duplicates(subset=['grid_index'], keep='first')

# check on fractional overlap distribution
print(f"Fractional overlap:\n{grid['overlap'].describe()}\n")
n_small = grid[grid['overlap'] < 0.05]['grid_index'].count() # less than 5% spatial overlap
print(f"Dropping [{n_small} ({round(n_small/len(grid)*100,2)}%)] grids with < 5% fractional overlap.")
grid = grid[grid['overlap'] >= 0.05] # remove these observations


There are [57232] grids across [100] fires.


Index(['grid_index', 'grid_area', 'afd_count', 'unique_days', 'overlap',
       'frp_csum', 'frp_max', 'frp_min', 'frp_mean', 'frp_p90', 'frp_p95',
       'frp_p97', 'frp_p99', 'frp_first', 'day_max_frp', 'dt_max_frp',
       'first_obs_date', 'last_obs_date', 't4_max', 't4_mean', 't5_max',
       't5_mean', 'day_count', 'night_count', 'frp_max_day', 'frp_max_night',
       'frp_csum_day', 'frp_csum_night', 'frp_mean_day', 'frp_mean_night',
       'frp_p90_day', 'frp_p90_night', 'frp_p95_day', 'frp_p95_night',
       'frp_p97_day', 'frp_p97_night', 'frp_p99_day', 'frp_p99_night',
       'frp_first_day', 'frp_first_night', 'Fire_ID', 'Fire_Name', 'geometry',
       'x', 'y'],
      dtype='object')

Dropping [1395] duplicate grids.

Fractional overlap:
count    56767.000000
mean         2.029756
std          2.004417
min          0.000003
25%          0.591083
50%          1.498852
75%          2.779104
max         24.470760
Name: overlap, d

In [3]:
# Load the FORTYPCD and the Tree Table summaries

# FORTYPCD
fp = os.path.join(projdir,'data/tabular/mod/viirs_snpp_jpss1_gridstats_fortypnm_gp.csv')
fortyp = pd.read_csv(fp)
print(f"FORTYPCD columns:\n{fortyp.columns}\n")

# Tree Table
fp = os.path.join(projdir,'data/tabular/mod/gridstats_treetable.csv')
trees = pd.read_csv(fp)
trees.drop(columns=['Unnamed: 0','forest_pct'], inplace=True)
print(f"Tree Table columns:\n{trees.columns}\n")

# Check on how many grids match
print(f"\tFORTYP unique grids: {len(fortyp['grid_index'].unique())}")
print(f"\t\t total rows: {len(fortyp)}")
print(f"\tTree Table unique grids: {len(trees['grid_index'].unique())}")
print(f"\t\t total rows: {len(trees)}")

FORTYPCD columns:
Index(['Unnamed: 0', 'grid_index', 'fortypnm_gp', 'count', 'total_pixels',
       'pct_cover', 'forest_pixels', 'forest_pct'],
      dtype='object')

Tree Table columns:
Index(['grid_index', 'species_gp_n', 'tmid_n', 'shannon_h', 'balive', 'badead',
       'ba_ld', 'tpa_live', 'tpa_dead', 'tpa_ld', 'tree_ht_live',
       'tree_ht_dead', 'balive_total', 'badead_total', 'ba_ld_total',
       'tpa_live_total', 'tpa_dead_total', 'tpa_ld_total', 'shannon_h_mn',
       'sp_dominance_l', 'sp_dominance_d', 'sp_dominance_ld', 'sp_abundance_l',
       'sp_abundance_d', 'sp_abundance_ld'],
      dtype='object')

	FORTYP unique grids: 54011
		 total rows: 252547
	Tree Table unique grids: 53958
		 total rows: 262253


In [4]:
# check how many matching grids
match = fortyp[fortyp['grid_index'].isin(trees['grid_index'].unique())]
# Check on how many grids match
print(f"\tFORTYP unique grids: {len(match['grid_index'].unique())}")
print(f"\t\t total rows: {len(match)}")
print(f"\tTree Table unique grids: {len(trees['grid_index'].unique())}")
print(f"\t\t total rows: {len(trees)}")
del match
gc.collect()

	FORTYP unique grids: 53958
		 total rows: 252383
	Tree Table unique grids: 53958
		 total rows: 262253


0

In [5]:
# calculate the dominant forest type using the FORTYPCD
dfortyp = fortyp.loc[fortyp.groupby('grid_index')['pct_cover'].idxmax()]
dfortyp = dfortyp[['grid_index','fortypnm_gp','pct_cover','forest_pixels','forest_pct']]
dfortyp = dfortyp[dfortyp['grid_index'].isin(trees['grid_index'].unique())] # retain matching grids
dfortyp.rename(columns={'pct_cover': 'fortyp_pct'}, inplace=True)
dfortyp.columns

Index(['grid_index', 'fortypnm_gp', 'fortyp_pct', 'forest_pixels',
       'forest_pct'],
      dtype='object')

In [6]:
dfortyp['fortypnm_gp'].unique()

array(['Piñon-juniper', 'Ponderosa', 'Aspen', 'Mixed-conifer',
       'Lodgepole', 'Limber pine', 'Spruce-fir', 'Balsam fir',
       'Blue spruce', 'Oak-woodland', 'Nonstocked',
       'California mixed conifer', 'Sugar pine',
       'Sugar maple / beech / yellow birch', 'Cottonwood',
       'Foxtail pine / bristlecone pine', 'Paper birch', 'Willow',
       'Elm / ash / black locust',
       'Cercocarpus (mountain brush) woodland', 'Mountain hemlock',
       'Whitebark pine'], dtype=object)

In [7]:
# Subset the FORTYP metrics to retain those which match our FORTYPCDthe top species
spps = ['Ponderosa','Lodgepole','Spruce-fir','Aspen','Mixed-conifer','Piñon-juniper']
dfortyp_sp = dfortyp[dfortyp['fortypnm_gp'].isin(spps)]
print(f"{len(dfortyp_sp)} / {len(dfortyp)} with our predominant species.")
# del dfortyp
# gc.collect()

53005 / 53958 with our predominant species.


In [8]:
# Check on how many matches there are between the dominant type and the Tree Table
matches = (dfortyp_sp['grid_index'].isin(trees[trees['species_gp_n'].isin(dfortyp_sp['fortypnm_gp'])]['grid_index'])).sum()
# Print the results
total_grids = dfortyp_sp['grid_index'].nunique()
print(f"\nTotal grids: {total_grids}\n")
print(f"{matches} ({(matches / total_grids) * 100:.2f}%)\n")
del matches, total_grids
gc.collect()


Total grids: 53005

53005 (100.00%)



0

In [9]:
# merge to the tree table metrics
tree_metrics = dfortyp_sp.merge(trees, on='grid_index', how='left')
tree_metrics.head()

Unnamed: 0,grid_index,fortypnm_gp,fortyp_pct,forest_pixels,forest_pct,species_gp_n,tmid_n,shannon_h,balive,badead,...,tpa_live_total,tpa_dead_total,tpa_ld_total,shannon_h_mn,sp_dominance_l,sp_dominance_d,sp_dominance_ld,sp_abundance_l,sp_abundance_d,sp_abundance_ld
0,34602,Piñon-juniper,81.767956,182,99.450549,Aspen,1,7.168414,767.728006,0.0,...,8534.572042,515.513044,9050.085086,7.779799,0.001233,0.0,0.001128,0.00298,0.0,0.00298
1,34602,Piñon-juniper,81.767956,182,99.450549,Mixed-conifer,1,7.674867,193050.923236,16366.250072,...,8534.572042,515.513044,9050.085086,7.779799,0.309988,0.282755,0.307672,0.350334,0.053451,0.350334
2,34602,Piñon-juniper,81.767956,182,99.450549,Piñon-juniper,12,7.695326,327203.488306,22936.215795,...,8534.572042,515.513044,9050.085086,7.779799,0.525401,0.396263,0.514419,0.596255,0.893098,0.596255
3,34602,Piñon-juniper,81.767956,182,99.450549,Ponderosa,5,8.58059,101746.724302,18578.858875,...,8534.572042,515.513044,9050.085086,7.779799,0.163378,0.320982,0.17678,0.050432,0.053451,0.050432
4,34603,Piñon-juniper,56.886228,169,98.816568,Aspen,1,7.168414,767.728006,0.0,...,12223.704239,700.209945,12923.914184,7.535196,0.000889,0.0,0.000825,0.00208,0.0,0.00208


In [10]:
print(len(tree_metrics))

258831


In [11]:
tree_metrics.columns

Index(['grid_index', 'fortypnm_gp', 'fortyp_pct', 'forest_pixels',
       'forest_pct', 'species_gp_n', 'tmid_n', 'shannon_h', 'balive', 'badead',
       'ba_ld', 'tpa_live', 'tpa_dead', 'tpa_ld', 'tree_ht_live',
       'tree_ht_dead', 'balive_total', 'badead_total', 'ba_ld_total',
       'tpa_live_total', 'tpa_dead_total', 'tpa_ld_total', 'shannon_h_mn',
       'sp_dominance_l', 'sp_dominance_d', 'sp_dominance_ld', 'sp_abundance_l',
       'sp_abundance_d', 'sp_abundance_ld'],
      dtype='object')

In [12]:
tree_metrics[['grid_index','fortypnm_gp','fortyp_pct','species_gp_n',
              'sp_abundance_ld','sp_dominance_ld','shannon_h_mn']].head(12)

Unnamed: 0,grid_index,fortypnm_gp,fortyp_pct,species_gp_n,sp_abundance_ld,sp_dominance_ld,shannon_h_mn
0,34602,Piñon-juniper,81.767956,Aspen,0.00298,0.001128,7.779799
1,34602,Piñon-juniper,81.767956,Mixed-conifer,0.350334,0.307672,7.779799
2,34602,Piñon-juniper,81.767956,Piñon-juniper,0.596255,0.514419,7.779799
3,34602,Piñon-juniper,81.767956,Ponderosa,0.050432,0.17678,7.779799
4,34603,Piñon-juniper,56.886228,Aspen,0.00208,0.000825,7.535196
5,34603,Piñon-juniper,56.886228,Mixed-conifer,0.495238,0.478499,7.535196
6,34603,Piñon-juniper,56.886228,Piñon-juniper,0.396258,0.332191,7.535196
7,34603,Piñon-juniper,56.886228,Ponderosa,0.106424,0.188485,7.535196
8,34604,Piñon-juniper,52.095808,Aspen,0.002667,0.001041,7.638961
9,34604,Piñon-juniper,52.095808,Mixed-conifer,0.319796,0.284465,7.638961


In [13]:
# Load the climate and topography information

# topography
fp = os.path.join(projdir,'data/earth-engine/exports/gridstats_topo.csv')
topo = pd.read_csv(fp)
print(f"\n{topo.columns}\n")

# climate (gridmet)
fp = os.path.join(projdir,'data/earth-engine/exports/gridstats_gridmet.csv')
climate = pd.read_csv(fp)
print(f"\n{climate.columns}\n")


Index(['system:index', 'chili', 'elev', 'grid_index', 'slope', 'tpi', '.geo'], dtype='object')


Index(['system:index', 'Fire_ID', 'erc', 'erc_dv', 'first_obs', 'vpd',
       'vpd_dv', '.geo'],
      dtype='object')



In [14]:
# tidy and merge the climate/topo
climate.rename(columns={'first_obs': 'first_obs_date'}, inplace=True) # for joining to the grid data
climate = climate[['Fire_ID', 'first_obs_date', 'erc', 'erc_dv', 'vpd', 'vpd_dv']] # keep needed columns
climate['Fire_ID'] = climate['Fire_ID'].astype(str) # to match the grid column
grid['first_obs_date'] = grid['first_obs_date'].astype(str) # to match gee output
# merge climate to the grid by fire id and first acquisition day
grid_clim = grid.merge(climate, on=['Fire_ID','first_obs_date'], how='left')
print(grid_clim.columns)

Index(['grid_index', 'grid_area', 'afd_count', 'unique_days', 'overlap',
       'frp_csum', 'frp_max', 'frp_min', 'frp_mean', 'frp_p90', 'frp_p95',
       'frp_p97', 'frp_p99', 'frp_first', 'day_max_frp', 'dt_max_frp',
       'first_obs_date', 'last_obs_date', 't4_max', 't4_mean', 't5_max',
       't5_mean', 'day_count', 'night_count', 'frp_max_day', 'frp_max_night',
       'frp_csum_day', 'frp_csum_night', 'frp_mean_day', 'frp_mean_night',
       'frp_p90_day', 'frp_p90_night', 'frp_p95_day', 'frp_p95_night',
       'frp_p97_day', 'frp_p97_night', 'frp_p99_day', 'frp_p99_night',
       'frp_first_day', 'frp_first_night', 'Fire_ID', 'Fire_Name', 'geometry',
       'x', 'y', 'erc', 'erc_dv', 'vpd', 'vpd_dv'],
      dtype='object')


In [15]:
# merge the topography to the grid
topo = topo[['grid_index', 'elev', 'slope', 'chili', 'tpi']]
grid_clim_topo = grid_clim.merge(topo, on='grid_index', how='left')
grid_clim_topo.columns

Index(['grid_index', 'grid_area', 'afd_count', 'unique_days', 'overlap',
       'frp_csum', 'frp_max', 'frp_min', 'frp_mean', 'frp_p90', 'frp_p95',
       'frp_p97', 'frp_p99', 'frp_first', 'day_max_frp', 'dt_max_frp',
       'first_obs_date', 'last_obs_date', 't4_max', 't4_mean', 't5_max',
       't5_mean', 'day_count', 'night_count', 'frp_max_day', 'frp_max_night',
       'frp_csum_day', 'frp_csum_night', 'frp_mean_day', 'frp_mean_night',
       'frp_p90_day', 'frp_p90_night', 'frp_p95_day', 'frp_p95_night',
       'frp_p97_day', 'frp_p97_night', 'frp_p99_day', 'frp_p99_night',
       'frp_first_day', 'frp_first_night', 'Fire_ID', 'Fire_Name', 'geometry',
       'x', 'y', 'erc', 'erc_dv', 'vpd', 'vpd_dv', 'elev', 'slope', 'chili',
       'tpi'],
      dtype='object')

In [16]:
# subset columns to keep for modeling
grid_clim_topo = grid_clim_topo[['grid_index','Fire_ID','first_obs_date','frp_csum','frp_max','frp_max_day',
                                 'frp_max_night','frp_csum_day','frp_csum_night','afd_count','day_count','night_count',
                                 'erc','erc_dv','vpd','vpd_dv','elev','slope','chili','tpi','x','y']]
tree_metrics = tree_metrics[['grid_index','fortypnm_gp','fortyp_pct','forest_pct','species_gp_n',
                             'balive','badead','tpa_live','tpa_dead','sp_abundance_ld','sp_dominance_ld',
                             'tree_ht_live','tree_ht_dead','shannon_h_mn']]
# merge the FRP, climate, and topogrpahy to the forest metrics table
grid_tm = tree_metrics.merge(grid_clim_topo, on='grid_index', how='inner')
grid_tm.head()

Unnamed: 0,grid_index,fortypnm_gp,fortyp_pct,forest_pct,species_gp_n,balive,badead,tpa_live,tpa_dead,sp_abundance_ld,...,erc,erc_dv,vpd,vpd_dv,elev,slope,chili,tpi,x,y
0,34602,Piñon-juniper,81.767956,99.450549,Aspen,767.728006,0.0,25.429283,0.0,0.00298,...,67.272727,14.479287,1.125455,0.352746,2330.355876,8.706296,204.949908,22.736575,-109.55098,37.761913
1,34602,Piñon-juniper,81.767956,99.450549,Mixed-conifer,193050.923236,16366.250072,2989.948382,27.554587,0.350334,...,67.272727,14.479287,1.125455,0.352746,2330.355876,8.706296,204.949908,22.736575,-109.55098,37.761913
2,34602,Piñon-juniper,81.767956,99.450549,Piñon-juniper,327203.488306,22936.215795,5088.781307,460.403869,0.596255,...,67.272727,14.479287,1.125455,0.352746,2330.355876,8.706296,204.949908,22.736575,-109.55098,37.761913
3,34602,Piñon-juniper,81.767956,99.450549,Ponderosa,101746.724302,18578.858875,430.41307,27.554587,0.050432,...,67.272727,14.479287,1.125455,0.352746,2330.355876,8.706296,204.949908,22.736575,-109.55098,37.761913
4,34603,Piñon-juniper,56.886228,98.816568,Aspen,767.728006,0.0,25.429283,0.0,0.00208,...,67.272727,14.479287,1.125455,0.352746,2337.46426,5.940594,210.817931,17.923125,-109.551591,37.765225


In [17]:
len(grid_tm)

261015

In [18]:
# save this file out.
out_fp = os.path.join(projdir,'data/tabular/mod/gridstats_fortypnm_gp_tm_ct.csv')
grid_tm.to_csv(out_fp)
print(f"Saved file to: {out_fp}")

Saved file to: /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/tabular/mod/gridstats_fortypnm_gp_tm_ct.csv
