In [1]:
"""
Summaries of USFS TreeMap linked to FIA plot data
Emphasis on 
    - Metrics of forest composition
    - Ecological gradients of species dominance
    - Forest structure (abundance, dominance, diversity, stand height)

Aggregate these statistics to FRP gridcells.

Author: maxwell.cook@colorado.edu
"""

import os, sys, time
import pandas as pd
import rioxarray as rxr
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.colors import to_rgba

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

proj = 'EPSG:5070'

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")

Ready to go !


In [2]:
# load the aggregated FRP grid
fp = os.path.join(projdir,'data/spatial/mod/VIIRS/viirs_snpp_jpss1_afd_latlon_fires_pixar_gridstats.gpkg')
grid = gpd.read_file(fp)
grid.columns

Index(['grid_index', 'grid_area', 'afd_count', 'unique_days', 'overlap',
       'frp_csum', 'frp_max', 'frp_min', 'frp_mean', 'frp_p90', 'frp_p95',
       'frp_p97', 'frp_p99', 'frp_first', 'day_max_frp', 'dt_max_frp',
       'first_obs_date', 'last_obs_date', 't4_max', 't4_mean', 't5_max',
       't5_mean', 'day_count', 'night_count', 'frp_max_day', 'frp_max_night',
       'frp_csum_day', 'frp_csum_night', 'frp_mean_day', 'frp_mean_night',
       'frp_p90_day', 'frp_p90_night', 'frp_p95_day', 'frp_p95_night',
       'frp_p97_day', 'frp_p97_night', 'frp_p99_day', 'frp_p99_night',
       'frp_first_day', 'frp_first_night', 'Fire_ID', 'Fire_Name', 'geometry'],
      dtype='object')

In [3]:
# load and prepare our study region for cropping TreeMap
# Southern Rockies ecoregion bounds (buffered)
fp = os.path.join(projdir,'data/spatial/raw/boundaries/na_cec_eco_l3_srme.gpkg')
srm = gpd.read_file(fp)
# Crop the raster by the SRM bounds
srm['geometry'] = srm.geometry.buffer(10000)
bounds = srm.total_bounds # total bounds of ecoregion
bounds

array([-1193290.29502988,  1391628.00599962,  -683136.18714099,
        2253336.36986925])

In [4]:
# Load the TreeMap (ca. 2016)
# Pixel values here denote the FIA plot ID ("tm_id")
# fp = os.path.join(maindir,'data/landcover/USFS/RDS_TreeMap/TreeMap2016.tif')
fp = os.path.join(projdir, "data/spatial/mod/USFS/TreeMap_2016_TMID_FORTYPCD.tif")
treemap_da = rxr.open_rasterio(fp, masked=True, cache=False, chunks='auto').squeeze()

# Grab some raster metadata from one of the layers
shp, gt, wkt, nd = treemap_da.shape, treemap_da.spatial_ref.GeoTransform, treemap_da.rio.crs, treemap_da.rio.nodata
print(
    f"Shape: {shp}; \n"
    f"GeoTransform: {gt}; \n"
    f"WKT: {wkt}; \n"
    f"NoData Value: {nd}; \n"
    f"Data Type: {treemap_da[0].dtype}")

# # crop to handle a subset of the data before aligning and stacking
# # tm_id raster
# treemap_da_c = treemap_da.rio.clip_box(
#     minx=bounds[0],
#     miny=bounds[1],
#     maxx=bounds[2],
#     maxy=bounds[3]
# )

# print(f"\nCropped TreeMap data to SRM bounds w/ 10km buffer.")

# del treemap_da, bounds
# gc.collect() # clean up

Shape: (2, 28724, 17006); 
GeoTransform: -1193295.0 30.0 0.0 2253345.0 0.0 -30.0; 
WKT: EPSG:5070; 
NoData Value: nan; 
Data Type: float32


In [5]:
tmid_vals = treemap_da.sel(band=1).values.flatten()  # tm_id band
fortypcd_vals = treemap_da.sel(band=2).values.flatten()  # FORTYPCD band
pixel_mapping = pd.DataFrame({
    'tm_id': tmid_vals,
    'fortypcd': fortypcd_vals
}).dropna(subset=['tm_id', 'fortypcd'])
pixel_mapping = pixel_mapping.drop_duplicates()
del tmid_vals, fortypcd_vals
gc.collect()

0

In [6]:
# get the count/proportion of unique "tm_id" from TreeMap in grids
t0 = time.time()

# see __functions.py
treemap_da_c = treemap_da.sel(band=1)
grid_tmid = compute_band_stats(grid, treemap_da_c, 'grid_index', attr='tm_id')

# load the TreeMap Tree Table
fp = os.path.join(maindir,'data/landcover/USFS/RDS_TreeMap/TreeMap2016_tree_table.csv')
tree_tbl = pd.read_csv(fp)
print(tree_tbl.columns)

# tidy columns
grid_tmid['count'] = grid_tmid['count'].astype(int)
grid_tmid['total_pixels'] = grid_tmid['total_pixels'].astype(int)
grid_tmid.rename(columns={'total_pixels': 'forest_pixels'}, inplace=True)

# join to the grid data
grid_trees = grid_tmid.merge(tree_tbl, on='tm_id', how='left')

del tree_tbl, treemap_da_c # clean up 

print(f"\n{grid_trees.head(3)}")

t1 = (time.time() - t0) / 60
print(f"\nTotal elapsed time: {t1:.2f} minutes.")
print("\n~~~~~~~~~~\n")

Index(['tm_id', 'CN', 'STATUSCD', 'TPA_UNADJ', 'SPCD', 'COMMON_NAME',
       'SCIENTIFIC_NAME', 'SPECIES_SYMBOL', 'DIA', 'HT', 'ACTUALHT', 'CR',
       'SUBP', 'TREE', 'AGENTCD'],
      dtype='object')

   grid_index  tm_id  count  forest_pixels  pct_cover            CN  STATUSCD  \
0     1955419   5081      2            169   1.183432  3.536933e+13       1.0   
1     1955419   5081      2            169   1.183432  3.536933e+13       1.0   
2     1955419   5081      2            169   1.183432  3.536933e+13       2.0   

   TPA_UNADJ   SPCD        COMMON_NAME    SCIENTIFIC_NAME SPECIES_SYMBOL  \
0   6.018046  803.0  Arizona white oak  Quercus arizonica           QUAR   
1   6.018046  803.0  Arizona white oak  Quercus arizonica           QUAR   
2   6.018046  803.0  Arizona white oak  Quercus arizonica           QUAR   

    DIA    HT  ACTUALHT    CR  SUBP  TREE  AGENTCD  
0  10.9  15.0      15.0  70.0   1.0   1.0      NaN  
1   5.6  11.0      11.0  70.0   1.0   2.0      NaN  
2  10.0 

In [7]:
# Calculate the Shannon diversity index for each tm_id
# Total trees per tm_id
grid_trees['total_trees'] = grid_trees.groupby('tm_id')['TREE'].transform('sum')
grid_trees['p_species'] = grid_trees['TREE'] / grid_trees['total_trees'] # proportion

# Calculate Shannon diversity (H') for each tree row
grid_trees['shannon_h'] = -grid_trees['p_species'] * np.log(grid_trees['p_species'])

# Aggregate to calculate H' at the tm_id level
shannon_tmid = (
    grid_trees.groupby('tm_id', as_index=False)
    .agg(shannon_h=('shannon_h', 'sum'))
)

# merge back
grid_trees.drop(columns=['shannon_h'], inplace=True)
grid_trees = grid_trees.merge(shannon_tmid, on='tm_id', how='left')
shannon_tmid.head(3)

Unnamed: 0,tm_id,shannon_h
0,32,5.706651
1,845,4.065466
2,1207,5.954289


In [8]:
grid_trees.columns

Index(['grid_index', 'tm_id', 'count', 'forest_pixels', 'pct_cover', 'CN',
       'STATUSCD', 'TPA_UNADJ', 'SPCD', 'COMMON_NAME', 'SCIENTIFIC_NAME',
       'SPECIES_SYMBOL', 'DIA', 'HT', 'ACTUALHT', 'CR', 'SUBP', 'TREE',
       'AGENTCD', 'total_trees', 'p_species', 'shannon_h'],
      dtype='object')

In [9]:
# Calculate the Tree Table statistics across tm_id/grid_index combinations
#### Abundance (trees/acre, scaled to per-pixel)
#### Dominance (live basal area, scaled to per-pixel)
#### Tree height (average/species)

# force species name to lower case
grid_trees.rename(columns={'COMMON_NAME': 'species'}, inplace=True)
grid_trees['species'] = grid_trees['species'].str.lower()
grid_trees['live_dead'] = (grid_trees['STATUSCD'] == 1).astype(int)
# calculate the trees per acre, scaling by the acre/pixel factor
grid_trees['tree_tpa'] = (grid_trees['TREE'] * grid_trees['TPA_UNADJ']) * 0.222395 # trees/acre

# subset to live trees
grid_trees['BA'] = np.pi * (grid_trees['DIA'] / 2) ** 2  # Basal area in square feet
grid_trees['BA_TPAsc'] = grid_trees['BA'] * grid_trees['tree_tpa']  # adjust based on tree abundance

# group by grid_index and plot identifier (tm_id)
# calculate statistics for live/dead seperately
# merge into a single dataframe

# live trees
live_metrics = (
    grid_trees[grid_trees['live_dead'] == 1]
    .groupby(['grid_index', 'tm_id', 'species'], as_index=False)
    .agg(
        tmid_pixels=('count','max'), # retains the tm_id pixel count
        shannon_h=('shannon_h','max'), # retains the tm_id shannon index
        balive=('BA_TPAsc', 'sum'),    # Live basal area
        tpa_live=('tree_tpa', 'sum'),  # Total live tree count (trees/acre)
        tree_ht_live=('ACTUALHT','mean') # average tree height
    )
)

# dead trees
dead_metrics = (
    grid_trees[grid_trees['live_dead'] == 0]
    .groupby(['grid_index', 'tm_id', 'species'], as_index=False)
    .agg(
        badead=('BA_TPAsc', 'sum'),    # Live basal area
        tpa_dead=('tree_tpa', 'sum'),  # Total live tree count (trees/acre)
        tree_ht_dead=('ACTUALHT','mean') # average tree height
    )
)

# combine live/dead balive
tree_metrics = live_metrics.merge(
    dead_metrics, 
    on=['grid_index', 'tm_id', 'species'], 
    how='outer'
).fillna(0)
tree_metrics['tmid_acres'] =  tree_metrics['tmid_pixels'] * 0.222395 # total acres for the tm_id (pixels * conversion factor)
# subset to the tree metric columns
tree_metrics = tree_metrics[['grid_index','tm_id','tmid_pixels','tmid_acres','species','shannon_h',
                             'balive','badead','tpa_live','tpa_dead','tree_ht_live','tree_ht_dead']]
# check on the results
tree_metrics.head(12)

Unnamed: 0,grid_index,tm_id,tmid_pixels,tmid_acres,species,shannon_h,balive,badead,tpa_live,tpa_dead,tree_ht_live,tree_ht_dead
0,183297,5447,7.0,1.556765,common or two-needle pinyon,8.638175,2019.306716,1060.887083,36.13635,6.691917,16.111111,20.5
1,183297,5447,7.0,1.556765,utah juniper,8.638175,1206.746574,1910.490238,18.737367,13.383833,13.8,12.0
2,183297,13033,45.0,10.007775,common or two-needle pinyon,6.11844,545.805142,211.136764,138.728761,4.01515,9.666667,10.0
3,183297,13033,45.0,10.007775,utah juniper,6.11844,2450.525649,0.0,44.16665,0.0,12.0,0.0
4,183297,41520,11.0,2.446345,utah juniper,3.817086,975.72179,0.0,6.691917,0.0,12.5,0.0
5,183297,41609,5.0,1.111975,utah juniper,5.774478,2595.425687,90.915159,138.844538,1.338383,10.307692,7.0
6,183297,46609,39.0,8.673405,common or two-needle pinyon,4.629669,165.568813,0.0,5.353533,0.0,15.0,0.0
7,183297,46609,39.0,8.673405,oneseed juniper,4.629669,1882.298024,0.0,5.353533,0.0,14.666667,0.0
8,183298,13198,1.0,0.222395,utah juniper,4.752754,11655.514639,756.837948,113.762584,6.691917,10.136364,13.0
9,183298,46609,65.0,14.455675,common or two-needle pinyon,4.629669,165.568813,0.0,5.353533,0.0,15.0,0.0


In [10]:
# join in the FORTYPCD by tm_id
# merge by tm_id
tree_metrics_ = tree_metrics.merge(pixel_mapping, on='tm_id', how='left')
tree_metrics_.head(12)

Unnamed: 0,grid_index,tm_id,tmid_pixels,tmid_acres,species,shannon_h,balive,badead,tpa_live,tpa_dead,tree_ht_live,tree_ht_dead,fortypcd
0,183297,5447,7.0,1.556765,common or two-needle pinyon,8.638175,2019.306716,1060.887083,36.13635,6.691917,16.111111,20.5,901.0
1,183297,5447,7.0,1.556765,common or two-needle pinyon,8.638175,2019.306716,1060.887083,36.13635,6.691917,16.111111,20.5,185.0
2,183297,5447,7.0,1.556765,utah juniper,8.638175,1206.746574,1910.490238,18.737367,13.383833,13.8,12.0,901.0
3,183297,5447,7.0,1.556765,utah juniper,8.638175,1206.746574,1910.490238,18.737367,13.383833,13.8,12.0,185.0
4,183297,13033,45.0,10.007775,common or two-needle pinyon,6.11844,545.805142,211.136764,138.728761,4.01515,9.666667,10.0,185.0
5,183297,13033,45.0,10.007775,utah juniper,6.11844,2450.525649,0.0,44.16665,0.0,12.0,0.0,185.0
6,183297,41520,11.0,2.446345,utah juniper,3.817086,975.72179,0.0,6.691917,0.0,12.5,0.0,184.0
7,183297,41609,5.0,1.111975,utah juniper,5.774478,2595.425687,90.915159,138.844538,1.338383,10.307692,7.0,184.0
8,183297,46609,39.0,8.673405,common or two-needle pinyon,4.629669,165.568813,0.0,5.353533,0.0,15.0,0.0,185.0
9,183297,46609,39.0,8.673405,oneseed juniper,4.629669,1882.298024,0.0,5.353533,0.0,14.666667,0.0,185.0


In [11]:
print(len(tree_metrics_))
print(tree_metrics_['fortypcd'].isna().sum())

3677408
0


In [12]:
# join in the forest type name
# load the TreeMap FORTYPCD species lookup table
fp = os.path.join(projdir,'data/tabular/mod/treemap_fortypcd_species_mapping.csv')
species_df = pd.read_csv(fp)
species_df = species_df[['FORTYPCD','SpeciesName']]
species_df.rename(columns={
    'FORTYPCD': 'fortypcd',
    'SpeciesName': 'fortypcd_nm'
}, inplace=True)
species_df = species_df[species_df['fortypcd'].isin(tree_metrics_['fortypcd'].unique())]
print(species_df.columns)

# join to the grid data
tree_metrics_ = tree_metrics_.merge(species_df, on='fortypcd', how='left')
print(f"\n{tree_metrics_.head(3)}\n")

Index(['fortypcd', 'fortypcd_nm'], dtype='object')

   grid_index  tm_id  tmid_pixels  tmid_acres                      species  \
0      183297   5447          7.0    1.556765  common or two-needle pinyon   
1      183297   5447          7.0    1.556765  common or two-needle pinyon   
2      183297   5447          7.0    1.556765                 utah juniper   

   shannon_h       balive       badead   tpa_live   tpa_dead  tree_ht_live  \
0   8.638175  2019.306716  1060.887083  36.136350   6.691917     16.111111   
1   8.638175  2019.306716  1060.887083  36.136350   6.691917     16.111111   
2   8.638175  1206.746574  1910.490238  18.737367  13.383833     13.800000   

   tree_ht_dead  fortypcd                fortypcd_nm  
0          20.5     901.0                      Aspen  
1          20.5     185.0  Pinyon / juniper woodland  
2          12.0     901.0                      Aspen  



In [13]:
# aggregate the TreeMap characteristics by FRP gridcell/species combinations
# summarizes forest metrics by individual species present in the grid cell
grid_tm = tree_metrics_.groupby(['grid_index', 'species', 'fortypcd_nm'], as_index=False).agg(
    tmid_unique=('tm_id', 'nunique'),
    fortypcd_n=('fortypcd','nunique'),
    tmid_acres=('tmid_acres','sum'),
    shannon_h=('shannon_h','mean'),
    balive=('balive', 'sum'),
    badead=('badead', 'sum'), 
    tpa_live=('tpa_live', 'sum'),  
    tpa_dead=('tpa_dead', 'sum') ,
    tree_ht_live=('tree_ht_live', 'mean'),
    tree_ht_dead=('tree_ht_dead', 'mean')
)

# Add a "combined TPA" (live + dead)
# this may be important for lodgepole and spruce-fir in particular (beetle-kill)
grid_tm['tpa_ld'] = grid_tm['tpa_live'] + grid_tm['tpa_dead']
grid_tm['batotal'] = grid_tm['balive'] + grid_tm['badead']

# Check results
grid_tm.head(10)

Unnamed: 0,grid_index,species,fortypcd_nm,tmid_unique,fortypcd_n,tmid_acres,shannon_h,balive,badead,tpa_live,tpa_dead,tree_ht_live,tree_ht_dead,tpa_ld,batotal
0,183297,common or two-needle pinyon,Aspen,1,1,1.556765,8.638175,2019.306716,1060.887083,36.13635,6.691917,16.111111,20.5,42.828267,3080.193799
1,183297,common or two-needle pinyon,Pinyon / juniper woodland,3,1,20.237945,6.462095,2730.680671,1272.023847,180.218644,10.707067,13.592593,10.166667,190.925711,4002.704518
2,183297,oneseed juniper,Pinyon / juniper woodland,1,1,8.673405,4.629669,1882.298024,0.0,5.353533,0.0,14.666667,0.0,5.353533,1882.298024
3,183297,utah juniper,Aspen,1,1,1.556765,8.638175,1206.746574,1910.490238,18.737367,13.383833,13.8,12.0,32.1212,3117.236812
4,183297,utah juniper,Juniper woodland,2,1,3.55832,4.795782,3571.147477,90.915159,145.536454,1.338383,11.403846,3.5,146.874838,3662.062636
5,183297,utah juniper,Pinyon / juniper woodland,2,1,11.56454,7.378307,3657.272223,1910.490238,62.904017,13.383833,12.9,6.0,76.28785,5567.762461
6,183298,common or two-needle pinyon,Pinyon / juniper woodland,1,1,14.455675,4.629669,165.568813,0.0,5.353533,0.0,15.0,0.0,5.353533,165.568813
7,183298,oneseed juniper,Pinyon / juniper woodland,1,1,14.455675,4.629669,1882.298024,0.0,5.353533,0.0,14.666667,0.0,5.353533,1882.298024
8,183298,utah juniper,Juniper woodland,1,1,0.222395,4.752754,11655.514639,756.837948,113.762584,6.691917,10.136364,13.0,120.454501,12412.352587
9,183299,common or two-needle pinyon,Pinyon / juniper woodland,3,1,7.339035,4.940407,5272.452479,0.0,451.595553,0.0,13.416667,0.0,451.595553,5272.452479


In [14]:
unique_spp = grid_tm['species'].unique()
unique_spp

array(['common or two-needle pinyon', 'oneseed juniper', 'utah juniper',
       'honey mesquite', 'siberian elm', 'velvet mesquite',
       'alligator juniper', 'arizona pinyon pine', 'gambel oak',
       'singleleaf pinyon', 'ponderosa pine', 'western juniper',
       'arizona white oak', 'cherrybark oak', 'common persimmon',
       'eastern hophornbeam', 'eastern redbud', 'emory oak', 'green ash',
       'mexican pinyon pine', 'shagbark hickory', 'sugarberry',
       'swamp chestnut oak', 'sweetgum', 'blue spruce', 'douglas-fir',
       'quaking aspen', 'white fir', 'american hornbeam, musclewood',
       'narrowleaf cottonwood', 'red maple', 'river birch',
       'swamp tupelo', 'sweetbay', 'water oak', 'water tupelo',
       'eastern cottonwood', 'plains cottonwood',
       'rocky mountain juniper', 'fremont cottonwood',
       'southwestern white pine', 'jeffrey pine', 'american elm',
       'loblolly pine', 'pignut hickory', 'post oak', 'sassafras',
       'southern red oak', 'wh

In [15]:
[s for s in unique_spp if 'pine' in str(s).lower()] # check pine species

['arizona pinyon pine',
 'ponderosa pine',
 'mexican pinyon pine',
 'southwestern white pine',
 'jeffrey pine',
 'loblolly pine',
 'longleaf pine',
 'slash pine',
 'arizona pine',
 'sugar pine',
 'knobcone pine',
 'lodgepole pine',
 'washoe pine',
 'subalpine fir',
 'whitebark pine',
 'gray or california foothill pine',
 'limber pine',
 'great basin bristlecone pine',
 'western white pine',
 'eastern white pine',
 'jack pine',
 'rocky mountain bristlecone pine',
 'sand pine',
 'shortleaf pine',
 'chihuahuan pine',
 'virginia pine',
 'spruce pine',
 'pond pine',
 'red pine']

In [16]:
# Identify the primary species from the Tree Table
spp_pr = (
    grid_tm.groupby('species', as_index=False)
    .agg(
        balive=('balive', 'sum'),
        batotal=('batotal', 'sum'),
        tpa_live=('tpa_live', 'sum'),
        tpa_ld=('tpa_ld', 'sum') # live+dead
    )
)
# calculate the fraction
spp_pr['balive_f'] = spp_pr['balive'] / spp_pr['balive'].sum()
spp_pr['batotal_f'] = spp_pr['batotal'] / spp_pr['batotal'].sum()
spp_pr['tpa_live_f'] = spp_pr['tpa_live'] / spp_pr['tpa_live'].sum()
spp_pr['tpa_ld_f'] = spp_pr['tpa_ld'] / spp_pr['tpa_ld'].sum()
# sort values and gather the primary species
spp_pr = spp_pr.sort_values(by='batotal_f', ascending=False)
# which species make up 97% of observations?
spp_pr['balive_f_csum'] = spp_pr['balive_f'].cumsum()
spp_pr['batotal_f_csum'] = spp_pr['batotal_f'].cumsum()
spp_pr['tpa_live_f_csum'] = spp_pr['tpa_live_f'].cumsum()
spp_pr['tpa_ld_f_csum'] = spp_pr['tpa_ld_f'].cumsum()

top_species = spp_pr[spp_pr['batotal_f_csum'] <= 0.97]
print(f"\nSpecies making up 97% of the total *metric*: \n\n{top_species[['species','batotal','tpa_ld']]}\n\n")


Species making up 97% of the total *metric*: 

                        species       batotal        tpa_ld
134              ponderosa pine  8.442305e+10  1.522110e+09
100              lodgepole pine  4.283409e+10  2.601359e+09
80                    grand fir  2.562921e+10  5.864789e+08
64                  douglas-fir  1.781552e+10  4.290890e+08
93                 jeffrey pine  1.163608e+10  1.506279e+08
203                   white fir  1.093655e+10  3.268134e+08
198             western juniper  1.076934e+10  1.432690e+08
173               subalpine fir  8.209387e+09  5.390362e+08
74             engelmann spruce  6.135386e+09  2.889311e+08
137               quaking aspen  3.389497e+09  1.786619e+08
199               western larch  3.289105e+09  9.952699e+07
90                incense-cedar  3.134805e+09  4.805677e+07
62   curlleaf mountain-mahogany  2.494766e+09  5.381842e+07
78                   gambel oak  1.347041e+09  2.578586e+08
188                utah juniper  1.098736e+09  2.376

In [17]:
# Aggregate species into forest groups
spp_grouping = {
    'pinyon|juniper|cedar': 'pinon-juniper',
    ' fir|spruce': 'spruce-fir',
    'limber|bristlecone': 'subalpine-pine',
    'ponderosa pine|Jeffrey': 'ponderosa', 
    'lodgepole pine': 'lodgepole',
    'aspen': 'quaking aspen',
    'Douglas': 'douglas-fir',
    'oak': 'oak-woodland',
    'cottonwood|willow': 'woody-riparian'
}

spp_remap = {} # dictionary to store the remap values
# Iterate over groups to create the species remap dictionary
for keywords, spp_group in spp_grouping.items():
    # Find species matching the keywords
    spp = grid_tm[grid_tm['species'].str.contains(keywords, case=False, na=False)]
    # Add matching species to the remap dictionary
    spp_remap.update({name: spp_group for name in spp['species'].unique()})

# Apply the remap to create a new grouped species column
grid_tm['species_gp_n'] = grid_tm['species'].map(spp_remap).fillna(grid_tm['species'])
# Verify the updated species groups
print(grid_tm[['species', 'species_gp_n']].drop_duplicates().head(10))

                        species     species_gp_n
0   common or two-needle pinyon    pinon-juniper
2               oneseed juniper    pinon-juniper
3                  utah juniper    pinon-juniper
10               honey mesquite   honey mesquite
12                 siberian elm     siberian elm
16              velvet mesquite  velvet mesquite
24            alligator juniper    pinon-juniper
25          arizona pinyon pine    pinon-juniper
27                   gambel oak     oak-woodland
29            singleleaf pinyon    pinon-juniper


In [18]:
# Identify the primary species
spp_pr = (
    grid_tm.groupby('species_gp_n', as_index=False)
    .agg(
        balive=('balive','sum'),
        tpa_live=('tpa_live', 'sum')
    )
)
# calculate the fraction
spp_pr['balive_f'] = spp_pr['balive'] / spp_pr['balive'].sum()
spp_pr['tpa_live_f'] = spp_pr['tpa_live'] / spp_pr['tpa_live'].sum()
# sort values and gather the primary species
spp_pr = spp_pr.sort_values(by='balive_f', ascending=False)
# which species make up 97% of observations?
spp_pr['balive_f_csum'] = spp_pr['balive_f'].cumsum()
spp_pr['tpa_live_f_csum'] = spp_pr['tpa_live_f'].cumsum()
top_species = spp_pr[spp_pr['tpa_live_f_csum'] <= 0.97]
print(f"\nSpecies making up 97% of the total *tpa_live_f_csum*: \n\n{top_species[['species_gp_n','balive_f','tpa_live_f']]}\n\n")


Species making up 97% of the total *tpa_live_f_csum*: 

      species_gp_n  balive_f  tpa_live_f
75       ponderosa  0.414456    0.222695
102     spruce-fir  0.204656    0.233692
54       lodgepole  0.177388    0.344340
36     douglas-fir  0.073194    0.057223
72   pinon-juniper  0.066153    0.036686
62    oak-woodland  0.013210    0.040468
77   quaking aspen  0.012798    0.022822




In [19]:
# get aspen grids (any aspen component)
# get gridcells with at least some aspen
spp = 'quaking aspen'
spp_idx = grid_tm[grid_tm['species'] == spp]['grid_index'].unique() # list of grid indices
grid_spp = grid_tm[grid_tm['grid_index'].isin(spp_idx)]
# check how many aspen gridcells there are
n_spp_grids = len(grid_spp['grid_index'].unique())
print(f"There are a total of {n_spp_grids} [{round(n_spp_grids/len(grid_tm['grid_index'].unique())*100,1)}%] grids w/ any aspen")

There are a total of 40717 [73.9%] grids w/ any aspen


In [23]:
# Identify dominant species for each grid cell by 'balive' or 'tpa_live'
metric = 'tpa_live'  # Change to 'tpa_live' if needed
dom_spp = (
    grid_tm.groupby('grid_index', as_index=False)
    .apply(lambda df: df.loc[df[metric].idxmax()])
    .reset_index(drop=True)
)

# Filter for grid cells where species is dominant
spp = 'ponderosa'
spp_dominant = dom_spp[dom_spp['species_gp_n'] == spp]

# Count the number of grid cells with aspen dominance
n_dom = len(spp_dominant['grid_index'].unique())

# Print the results
total_grids = len(grid_tm['grid_index'].unique())
spp_pct = round(n_dom / total_grids * 100, 1)
print(f"There are {n_dom} grid cells ({spp_pct}%) where {spp} is dominant based on {metric}.")

There are 14007 grid cells (25.4%) where ponderosa is dominant based on tpa_live.


In [23]:
grid_tm.columns

Index(['grid_index', 'species', 'fortypcd_nm', 'tmid_unique', 'fortypcd_n',
       'tmid_acres', 'shannon_h', 'balive', 'badead', 'tpa_live', 'tpa_dead',
       'tree_ht_live', 'tree_ht_dead', 'tpa_ld', 'batotal', 'species_gp_n'],
      dtype='object')

In [62]:
# Identify the primary species from the FORTYPCD
# calculate the fraction
# Identify the primary species from the Tree Table
spp_pr = (
    grid_tm.groupby('fortypcd_nm', as_index=False)
    .agg(
        count=('fortypcd_nm', 'size'),
        balive=('balive', 'sum'),
        batotal=('batotal', 'sum'),
        tpa_live=('tpa_live', 'sum'),
        tpa_ld=('tpa_ld', 'sum') # live+dead
    )
)
# calculate the fraction
spp_pr['fraction'] = spp_pr['count'] / spp_pr['count'].sum()
spp_pr = spp_pr.sort_values(by='fraction', ascending=False)
spp_pr['c_fraction'] = spp_pr['fraction'].cumsum()
top_species = spp_pr[spp_pr['c_fraction'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total: \n\n{top_species}\n\n")



Species making up 97% of the total: 

                         fortypcd_nm   count        balive       batotal  \
28                    Lodgepole pine  296549  3.912092e+10  4.408947e+10   
41                    Ponderosa pine  249946  9.378991e+10  1.014958e+11   
0                              Aspen  225163  3.566851e+09  4.195948e+09   
13                       Douglas-fir  170183  9.940877e+09  1.096679e+10   
40         Pinyon / juniper woodland  152073  1.973027e+09  2.267695e+09   
18  Engelmann spruce / subalpine fir  143273  1.033600e+10  1.191225e+10   
17                  Engelmann spruce   95547  1.060359e+09  1.360570e+09   
44            Rocky Mountain juniper   91335  6.010137e+08  6.423255e+08   
12            Deciduous oak woodland   82945  7.678383e+08  8.204062e+08   
46                     Subalpine fir   79942  1.498009e+09  1.872918e+09   
21                         Grand fir   73431  2.274365e+10  2.569649e+10   
53                         White fir   69260  5.

In [63]:
# Do the species groupings here as well
spp_grouping = {
    'pinyon|juniper': 'pinon-juniper',
    ' fir|spruce': 'spruce-fir',
    'ponderosa pine': 'ponderosa', # just shorten the name here
    'lodgepole pine': 'lodgepole',
    'aspen': 'quaking aspen',
    'Douglas': 'douglas-fir',
    'oak': 'oak-woodland'
}

spp_remap = {}
# Iterate over groups to create the species remap dictionary
for keywords, spp_group in spp_grouping.items():
    # Find species matching the keywords
    spp = grid_tm[grid_tm['fortypcd_nm'].str.contains(keywords, case=False, na=False)]
    # Add matching species to the remap dictionary
    spp_remap.update({name: spp_group for name in spp['fortypcd_nm'].unique()})

# Apply the remap to create a new grouped species column
grid_tm['fortyp_gp_n'] = grid_tm['fortypcd_nm'].map(spp_remap).fillna(grid_tm['fortypcd_nm'])
# Verify the updated species groups
print(grid_tm[['fortypcd_nm', 'fortyp_gp_n']].drop_duplicates().head(10))

                   fortypcd_nm             fortyp_gp_n
0                        Aspen           quaking aspen
1    Pinyon / juniper woodland           pinon-juniper
4             Juniper woodland           pinon-juniper
10      Other exotic hardwoods  Other exotic hardwoods
13                  Cottonwood              Cottonwood
16              Lodgepole pine               lodgepole
17           Mesquite woodland       Mesquite woodland
91              Ponderosa pine               ponderosa
96      Deciduous oak woodland            oak-woodland
271     Rocky Mountain juniper           pinon-juniper


In [65]:
# Identify the primary species from the FORTYPCD
# calculate the fraction
# Identify the primary species from the Tree Table
spp_pr = (
    grid_tm.groupby('fortyp_gp_n', as_index=False)
    .agg(
        count=('fortyp_gp_n', 'size'),
        balive=('balive', 'sum'),
        batotal=('batotal', 'sum'),
        tpa_live=('tpa_live', 'sum'),
        tpa_ld=('tpa_ld', 'sum') # live+dead
    )
)
# calculate the fraction
spp_pr['fraction'] = spp_pr['count'] / spp_pr['count'].sum()
spp_pr = spp_pr.sort_values(by='fraction', ascending=False)
spp_pr['c_fraction'] = spp_pr['fraction'].cumsum()
top_species = spp_pr[spp_pr['c_fraction'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total: \n\n{top_species}\n\n")



Species making up 97% of the total: 

                 fortyp_gp_n   count        balive       batotal  \
37                spruce-fir  481803  4.082800e+10  4.651675e+10   
34             pinon-juniper  320518  9.869371e+09  1.127526e+10   
32                 lodgepole  296549  3.912092e+10  4.408947e+10   
35                 ponderosa  249946  9.378991e+10  1.014958e+11   
36             quaking aspen  225163  3.566851e+09  4.195948e+09   
31               douglas-fir  170183  9.940877e+09  1.096679e+10   
33              oak-woodland   91500  1.652713e+09  1.866709e+09   
0   California mixed conifer   28043  1.081963e+10  1.254423e+10   
11               Limber pine   22055  2.618769e+08  3.018428e+08   
16                Nonstocked   18183  1.079291e+08  1.443018e+08   

        tpa_live        tpa_ld  fraction  c_fraction  
37  1.247964e+09  1.296867e+09  0.244772    0.244772  
34  2.614023e+08  2.713563e+08  0.162834    0.407605  
32  2.689754e+09  2.783447e+09  0.150657    0.

In [None]:
# Merge the FORTYPCD to the forest metrics grid (grid_tm)
print(len(grid_tm))
print(len(grid_fortypcd))

In [None]:
# calculate the landscape fraction (prevalence)
spp_pr = (
    grid_tm.groupby('species', as_index=False)
    .agg(
        balive=('balive','sum'),
        tpa_live=('tpa_live', 'sum')
    )
)
# calculate the fraction
spp_pr['balive_f'] = spp_pr['balive'] / spp_pr['balive'].sum()
spp_pr['tpa_f'] = spp_pr['tpa_live'] / spp_pr['tpa_live'].sum()

# sort values and plot
spp_pr = spp_pr.sort_values(by='balive_f', ascending=False)

# which species make up 97% of observations?
spp_pr['balive_f_csum'] = spp_pr['balive_f'].cumsum()
spp_pr['tpa_f_csum'] = spp_pr['tpa_f'].cumsum()
top_species = spp_pr[spp_pr['balive_f_csum'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total *metric*: \n\n{top_species}\n\n")

# plot it
plt.figure(figsize=(6, 4))
sns.barplot(data=top_species, x='species', y='balive_f', palette='viridis')
plt.xlabel('Species')
plt.ylabel('Fractional live basal area')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
spp_dom = (
    grid_trees.groupby(['grid_index', 'COMMON_NAME'])
    .size()
    .reset_index(name='frequency')  # Count occurrences of each species
    .sort_values(['grid_index', 'frequency'], ascending=[True, False])
    .drop_duplicates('grid_index')  # Keep only the top species per grid_index
)
print(spp_dom.head())

# calculate the landscape fraction (prevalence)
spp_dom = spp_dom.groupby('COMMON_NAME')['frequency'].sum().reset_index()
spp_dom['fraction'] = spp_dom['frequency'] / spp_dom['frequency'].sum()
spp_dom = spp_dom.sort_values(by='fraction', ascending=False)

# which species make up 97% of observations?
spp_dom['c_fraction'] = spp_dom['fraction'].cumsum()
top_species = spp_dom[spp_dom['c_fraction'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total: \n\n{top_species}\n\n")

# plot it
plt.figure(figsize=(6, 4))
sns.barplot(data=top_species, x='COMMON_NAME', y='fraction', palette='viridis')
plt.xlabel('Species')
plt.ylabel('Fractional (frequency)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Prepare data for stacked bar plot
stacked_data = comparison_table.set_index('COMMON_NAME')[
    ['fraction_balive', 'fraction_abundance', 'weighted_frequency']
].T

# Plot the stacked bar chart
stacked_data.plot(
    kind='bar',
    stacked=True,
    figsize=(12, 6),
    colormap='viridis'
)

# Customize the plot
plt.xlabel('Metric', fontsize=12)
plt.ylabel('Proportion', fontsize=12)
plt.title('Species Contribution by Different Metrics', fontsize=14)
plt.legend(title='Species', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# do some species regrouping and renaming
spp_grouping = {
    'pinyon|juniper': 'pinon-juniper',
    ' fir|spruce': 'spruce-fir',
    'ponderosa pine': 'ponderosa',
    'lodgepole pine': 'lodgepole'
}

spp_remap = {}
# Iterate over groups to create the species remap dictionary
for keywords, spp_group in spp_grouping.items():
    # Find species matching the keywords
    spp = grid_trees[grid_trees['COMMON_NAME'].str.contains(keywords, case=False, na=False)]
    # Add matching species to the remap dictionary
    spp_remap.update({name: spp_group for name in spp['COMMON_NAME'].unique()})

# Apply the remap to create a new grouped species column
grid_trees['SpeciesGroup'] = grid_trees['COMMON_NAME'].map(spp_remap).fillna(grid_trees['COMMON_NAME'])

# Verify the updated species groups
print(grid_trees[['COMMON_NAME', 'SpeciesGroup']].drop_duplicates().head(10))

In [None]:
# merge the live basal area
grid_trees = grid_trees.merge(ba_by_species, on=['grid_index', 'tm_id', 'COMMON_NAME'], how='left')
grid_trees[['grid_index','tm_id','count','COMMON_NAME','balive']].head(10)

In [None]:
# Collapse the tree data to species
row_counts = (
    grid_trees.groupby(['grid_index', 'tm_id', 'COMMON_NAME'])
    .size()
    .reset_index(name='tree_count')  # Name the count column 'tree_count'
)
# Merge the tree_count back into the original grid_trees DataFrame
grid_trees_sp = grid_trees.merge(row_counts, on=['grid_index', 'tm_id', 'COMMON_NAME'], how='left')
grid_trees_sp = grid_trees_sp.drop_duplicates(subset=['grid_index', 'tm_id', 'COMMON_NAME'])
grid_trees_sp[['grid_index', 'tm_id', 'COMMON_NAME', 'SpeciesGroup', 'tree_count', 'balive']].head(10)

In [None]:
# identify the dominant forest species for each "tm_id"
spp_dominance = (
    grid_trees_sp.groupby(['tm_id', 'SpeciesGroup'])['balive']  # Sum tree counts (or use another metric)
    .sum()
    .reset_index()
    .sort_values(['tm_id', 'balive'], ascending=[True, False])
    .drop_duplicates('tm_id')  # Keep only the top species per tm_id
)

# join back to the tm_id summary
grid_tmid_spp = grid_tmid.merge(spp_dominance[['tm_id', 'SpeciesGroup']], on='tm_id', how='left')

# identify the landscape proportion of dominant species
spp_pr = (
    grid_tmid_spp.groupby('SpeciesGroup')['count']
    .sum()
    .reset_index()
    .rename(columns={'count': 'maj_spp_count'})
    .sort_values(by='maj_spp_count', ascending=False)
)

# calculate the fraction
spp_pr['fraction'] = spp_pr['maj_spp_count'] / spp_pr['maj_spp_count'].sum()
spp_pr = spp_pr.sort_values(by='fraction', ascending=False)

# Identify species contributing 97% of the burned area
spp_pr['c_fraction'] = spp_pr['fraction'].cumsum()
top_species = spp_pr[spp_pr['c_fraction'] <= 0.97]
print(f"\nSpecies contributing to 97% of the burned area:\n{top_species}\n")

# plot it
plt.figure(figsize=(6, 4))
sns.barplot(data=top_species, x='SpeciesGroup', y='fraction', palette='viridis')
plt.xlabel('Species')
plt.ylabel('Fractional cover')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Calculate the species gradient (proportions) based on Tree Table
grid_trees.columns

In [None]:
# Calculate abundance (count) and dominance (basal area) for each species in each tm_id
sp_abundance = (
    grid_trees.groupby(['tm_id', 'COMMON_NAME'])['TREE'] 
    .sum()
    .reset_index()
    .rename(columns={'TREE': 'abundance'})
)
sp_dominance = (
    grid_trees.groupby(['tm_id', 'COMMON_NAME'])['balive']
    .sum()
    .reset_index()
    .rename(columns={'balive': 'dominance'})
)

# join these together
composition = sp_abundance.merge(sp_dominance, on=['tm_id', 'COMMON_NAME'], how='outer').fillna(0)

# Normalize abundance/dominance within each tm_id (plot)
composition['abundance'] = (
    composition.groupby('tm_id')['abundance'].transform(lambda x: x / x.sum())
)
composition['dominance'] = (
    composition.groupby('tm_id')['dominance'].transform(lambda x: x / x.sum())
)

# check the results
composition.head()

In [None]:
ba_by_species[ba_by_species['tm_id'] == 3726]

In [None]:
composition[composition['tm_id'] == 3726]

In [None]:
grid_trees[grid_trees['tm_id'] == 3726]

In [None]:
# test tree table
tree_tbl[tree_tbl['tm_id'] == 21404][['tm_id','COMMON_NAME','DIA','HT','CR','TREE']]