In [1]:
"""
Summaries of USFS TreeMap linked to FIA plot data
Emphasis on 
    - Metrics of forest composition
    - Ecological gradients of species dominance
    - Forest structure (abundance, dominance, diversity, stand height)

Aggregate these statistics to FRP gridcells.

Author: maxwell.cook@colorado.edu
"""

import os, sys, time
import pandas as pd
import rioxarray as rxr
import xarray as xr
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.colors import to_rgba

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

proj = 'EPSG:5070'

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")

Ready to go !


In [2]:
# load the aggregated FRP grid
fp = os.path.join(projdir,'data/spatial/mod/VIIRS/viirs_snpp_jpss1_afd_latlon_fires_pixar_gridstats.gpkg')
grid = gpd.read_file(fp)
grid.columns

Index(['grid_index', 'grid_area', 'afd_count', 'unique_days', 'overlap',
       'frp_csum', 'frp_max', 'frp_min', 'frp_mean', 'frp_p90', 'frp_p95',
       'frp_p97', 'frp_p99', 'frp_first', 'day_max_frp', 'dt_max_frp',
       'first_obs_date', 'last_obs_date', 't4_max', 't4_mean', 't5_max',
       't5_mean', 'day_count', 'night_count', 'frp_max_day', 'frp_max_night',
       'frp_csum_day', 'frp_csum_night', 'frp_mean_day', 'frp_mean_night',
       'frp_p90_day', 'frp_p90_night', 'frp_p95_day', 'frp_p95_night',
       'frp_p97_day', 'frp_p97_night', 'frp_p99_day', 'frp_p99_night',
       'frp_first_day', 'frp_first_night', 'Fire_ID', 'Fire_Name', 'geometry'],
      dtype='object')

In [3]:
# load and prepare our study region for cropping TreeMap
# Southern Rockies ecoregion bounds (buffered)
fp = os.path.join(projdir,'data/spatial/raw/boundaries/na_cec_eco_l3_srme.gpkg')
srm = gpd.read_file(fp)
# Crop the raster by the SRM bounds
srm['geometry'] = srm.geometry.buffer(10000)
bounds = srm.total_bounds # total bounds of ecoregion
bounds

array([-1193290.29502988,  1391628.00599962,  -683136.18714099,
        2253336.36986925])

In [4]:
# Load the TreeMap (ca. 2016)
# Pixel values here denote the FIA plot ID ("tm_id")
# see '04a_TreeMap_FIA-Prep.ipynb'
# fp = os.path.join(maindir,'data/landcover/USFS/RDS_TreeMap/TreeMap2016.tif') # tm_id band
fp = os.path.join(projdir, "data/spatial/mod/USFS/TreeMap_2016_TMID_FORTYPCD.tif") # multi-band
treemap_da = rxr.open_rasterio(fp, masked=True, cache=False).squeeze()
treemap_da = treemap_da.sel(band=1) # the "tm_id" band, dropping the FORTYP band

# Grab some raster metadata from one of the layers
shp, gt, wkt, nd = treemap_da.shape, treemap_da.spatial_ref.GeoTransform, treemap_da.rio.crs, treemap_da.rio.nodata
print(
    f"Shape: {shp}; \n"
    f"GeoTransform: {gt}; \n"
    f"WKT: {wkt}; \n"
    f"NoData Value: {nd}; \n"
    f"Data Type: {treemap_da[0].dtype}")

Shape: (28724, 17006); 
GeoTransform: -1193295.0 30.0 0.0 2253345.0 0.0 -30.0; 
WKT: EPSG:5070; 
NoData Value: nan; 
Data Type: float32


In [5]:
# create a forest/non-forest grid
forest_mask = xr.where(~np.isnan(treemap_da), 1, 0)

# Save as a GeoTIFF
out_fp = os.path.join(projdir, "data/spatial/mod/USFS/TreeMap_2016_forestmask.tif")
forest_mask.rio.to_raster(out_fp, compress='zstd', zstd_level=9, dtype='uint16', driver='GTiff')
print(f"Saved multi-band raster to: {out_fp}")

# calculate the total grid pixels
# see __functions.py 'compute_band_stats'
forest_pct = compute_band_stats(grid, forest_mask, 'grid_index', attr='constant')
forest_pct.rename(columns={
    'pct_cover': 'forest_pct',
    'total_pixels': 'forest_pix'
}, inplace=True)
forest_pct = forest_pct[forest_pct['constant'] == 1] # just forest pixels
forest_pct = forest_pct[['grid_index','forest_pix','forest_pct']]

del forest_mask
gc.collect() # clean up

forest_pct.head()

Saved multi-band raster to: /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/spatial/mod/USFS/TreeMap_2016_forestmask.tif


Unnamed: 0,grid_index,forest_pix,forest_pct
0,1955419,169,100.0
1,1955420,182,100.0
2,1955421,169,100.0
4,1955422,182,91.758242
6,1955423,169,73.372781


In [6]:
n_forest = len(forest_pct[forest_pct['forest_pct'] > 50])
print(f"[{round(n_forest/len(grid)*100,2)}%] predominantly forest grids.")

[84.76%] predominantly forest grids.


In [8]:
# get the count/proportion of unique "tm_id" from TreeMap in grids

t0 = time.time()

# see __functions.py
grid_tmid = compute_band_stats(grid, treemap_da, 'grid_index', attr='tm_id')

# tidy columns in the summary table
grid_tmid['count'] = grid_tmid['count'].astype(int)
grid_tmid['total_pixels'] = grid_tmid['total_pixels'].astype(int)
grid_tmid.rename(columns={
    'count': 'tmid_count',
    'total_pixels': 'total_pix',
    'pct_cover': 'tmid_pct' # percent of forest area
}, inplace=True)

# join in the forest_pct
grid_tmid = grid_tmid.merge(forest_pct, on='grid_index', how='left')
print(f"\n{grid_tmid.head(10)}")

t1 = (time.time() - t0) / 60
print(f"\nTotal elapsed time: {t1:.2f} minutes.")
print("\n~~~~~~~~~~\n")

del treemap_da # clean up 
gc.collect()


   grid_index  tm_id  tmid_count  total_pix   tmid_pct  forest_pix  forest_pct
0     1955419   5081           2        169   1.183432         169       100.0
1     1955419   5490          44        169  26.035503         169       100.0
2     1955419   5501           2        169   1.183432         169       100.0
3     1955419  12665           3        169   1.775148         169       100.0
4     1955419  20313           5        169   2.958580         169       100.0
5     1955419  45478           8        169   4.733728         169       100.0
6     1955419  46099           1        169   0.591716         169       100.0
7     1955419  51905           5        169   2.958580         169       100.0
8     1955419  53570           3        169   1.775148         169       100.0
9     1955419  53601           1        169   0.591716         169       100.0

Total elapsed time: 0.70 minutes.

~~~~~~~~~~



44

In [11]:
# load the TreeMap Tree Table
fp = os.path.join(maindir,'data/landcover/USFS/RDS_TreeMap/TreeMap2016_tree_table.csv')
tree_tbl = pd.read_csv(fp)
print(tree_tbl.columns)

# join the Tree Table to the grid data
grid_trees = grid_tmid.merge(tree_tbl, on='tm_id', how='left')
print(f"\n{grid_trees.head(3)}")

del tree_tbl
gc.collect()

Index(['tm_id', 'CN', 'STATUSCD', 'TPA_UNADJ', 'SPCD', 'COMMON_NAME',
       'SCIENTIFIC_NAME', 'SPECIES_SYMBOL', 'DIA', 'HT', 'ACTUALHT', 'CR',
       'SUBP', 'TREE', 'AGENTCD'],
      dtype='object')

   grid_index  tm_id  tmid_count  total_pix  tmid_pct  forest_pix  forest_pct  \
0     1955419   5081           2        169  1.183432         169       100.0   
1     1955419   5081           2        169  1.183432         169       100.0   
2     1955419   5081           2        169  1.183432         169       100.0   

             CN  STATUSCD  TPA_UNADJ  ...        COMMON_NAME  \
0  3.536933e+13       1.0   6.018046  ...  Arizona white oak   
1  3.536933e+13       1.0   6.018046  ...  Arizona white oak   
2  3.536933e+13       2.0   6.018046  ...  Arizona white oak   

     SCIENTIFIC_NAME SPECIES_SYMBOL   DIA    HT  ACTUALHT    CR  SUBP  TREE  \
0  Quercus arizonica           QUAR  10.9  15.0      15.0  70.0   1.0   1.0   
1  Quercus arizonica           QUAR   5.6  11.0      11.0

0

In [12]:
# Calculate the Shannon diversity index for each tm_id
# Using the full list of trees in the Tree Table
# Based on total trees per tm_id and proportion
grid_trees['total_trees'] = grid_trees.groupby('tm_id')['TREE'].transform('sum')
grid_trees['p_species'] = grid_trees['TREE'] / grid_trees['total_trees'] # proportion

# Calculate Shannon diversity (H') for each tree row
grid_trees['shannon_h'] = -grid_trees['p_species'] * np.log(grid_trees['p_species'])

# Aggregate to calculate H' at the tm_id level
shannon_tmid = (
    grid_trees.groupby('tm_id', as_index=False)
    .agg(shannon_h=('shannon_h', 'sum')))
# subset columns
shannon_tmid = shannon_tmid[['tm_id','shannon_h']]
print(shannon_tmid.head(3))

# merge back to the grid data
grid_trees.drop(columns=['shannon_h', 'total_trees', 'p_species'], inplace=True)
grid_trees = grid_trees.merge(shannon_tmid, on='tm_id', how='left')
print(f"\n{grid_trees.columns}")

   tm_id  shannon_h
0     32   5.706651
1    845   4.065466
2   1207   5.954289

Index(['grid_index', 'tm_id', 'tmid_count', 'total_pix', 'tmid_pct',
       'forest_pix', 'forest_pct', 'CN', 'STATUSCD', 'TPA_UNADJ', 'SPCD',
       'COMMON_NAME', 'SCIENTIFIC_NAME', 'SPECIES_SYMBOL', 'DIA', 'HT',
       'ACTUALHT', 'CR', 'SUBP', 'TREE', 'AGENTCD', 'shannon_h'],
      dtype='object')


In [13]:
# Calculate the Tree Table statistics across tm_id/grid_index combinations
#### Abundance (trees/acre, scaled to per-pixel)
#### Dominance (live basal area, scaled to per-pixel)
#### Tree height (average/species)

# force species name to lower case
grid_trees.rename(columns={'COMMON_NAME': 'species'}, inplace=True)
grid_trees['species'] = grid_trees['species'].str.lower()
# tidy the live/dead column
grid_trees['live_dead'] = (grid_trees['STATUSCD'] == 1).astype(int)
# calculate the trees per acre, scaling by the acre/pixel factor
grid_trees['tree_tpa'] = (grid_trees['TREE'] * grid_trees['TPA_UNADJ']) * 0.222395 # trees/acre

# calculate live basal area (balive) scaled by TPA
grid_trees['BA'] = np.pi * (grid_trees['DIA'] / 2) ** 2  # Basal area in square feet
grid_trees['BA_TPAsc'] = grid_trees['BA'] * grid_trees['tree_tpa']  # adjust based on tree abundance

# group by grid_index and plot identifier (tm_id)
# calculate statistics for live/dead seperately
# merge into a single dataframe

# live trees
live_metrics = (
    grid_trees[grid_trees['live_dead'] == 1]
    .groupby(['grid_index', 'tm_id', 'species'], as_index=False)
    .agg(
        tmid_pixels=('tmid_count','max'), # retains the tm_id pixel count
        tmid_pct=('tmid_pct','max'), # retain the percent of forested area
        forest_pct=('forest_pct','max'), # retain forest percent for the grid
        shannon_h=('shannon_h','max'), # retains the tm_id shannon index
        balive=('BA_TPAsc', 'sum'),    # total BALIVE
        tpa_live=('tree_tpa', 'sum'),  # total live tree count (trees/acre)
        tree_ht_live=('ACTUALHT','mean') # average tree height
    )
)

# dead trees
dead_metrics = (
    grid_trees[grid_trees['live_dead'] == 0]
    .groupby(['grid_index', 'tm_id', 'species'], as_index=False)
    .agg(
        badead=('BA_TPAsc', 'sum'),    # Live basal area
        tpa_dead=('tree_tpa', 'sum'),  # Total live tree count (trees/acre)
        tree_ht_dead=('ACTUALHT','mean') # average tree height
    )
)

# combine live/dead balive
tree_metrics = live_metrics.merge(
    dead_metrics, 
    on=['grid_index', 'tm_id', 'species'], 
    how='outer'
).fillna(0)
# total acres for the tm_id (pixels * conversion factor)
tree_metrics['tmid_acres'] =  tree_metrics['tmid_pixels'] * 0.222395 
# subset to the tree metric columns
tree_metrics = tree_metrics[['grid_index','tm_id','tmid_pixels','tmid_acres',
                             'tmid_pct','forest_pct','species','shannon_h',
                             'balive','badead','tpa_live','tpa_dead',
                             'tree_ht_live','tree_ht_dead']]

# Add a "combined TPA" (live + dead)
# this may be important for lodgepole and spruce-fir in particular (beetle-kill)
tree_metrics['tpa_ld'] = tree_metrics['tpa_live'] + tree_metrics['tpa_dead']
tree_metrics['ba_ld'] = tree_metrics['balive'] + tree_metrics['badead']

print(f"\n{tree_metrics.head(12)}")
del live_metrics, dead_metrics
gc.collect()


    grid_index  tm_id  tmid_pixels  tmid_acres   tmid_pct  forest_pct  \
0       183297   5447          7.0    1.556765   6.140351   67.455621   
1       183297   5447          7.0    1.556765   6.140351   67.455621   
2       183297  13033         45.0   10.007775  39.473684   67.455621   
3       183297  13033         45.0   10.007775  39.473684   67.455621   
4       183297  41520         11.0    2.446345   9.649123   67.455621   
5       183297  41609          5.0    1.111975   4.385965   67.455621   
6       183297  46609         39.0    8.673405  34.210526   67.455621   
7       183297  46609         39.0    8.673405  34.210526   67.455621   
8       183298  13198          1.0    0.222395   1.234568   44.505495   
9       183298  46609         65.0   14.455675  80.246914   44.505495   
10      183298  46609         65.0   14.455675  80.246914   44.505495   
11      183299   4505          2.0    0.444790   4.255319   27.810651   

                        species  shannon_h       

0

In [14]:
unique_spp = tree_metrics['species'].unique()
unique_spp

array(['common or two-needle pinyon', 'utah juniper', 'oneseed juniper',
       'velvet mesquite', 'honey mesquite', 'siberian elm', 'gambel oak',
       'alligator juniper', 'arizona pinyon pine', 'singleleaf pinyon',
       'ponderosa pine', 'western juniper', 'arizona white oak',
       'cherrybark oak', 'common persimmon', 'eastern hophornbeam',
       'eastern redbud', 'green ash', 'shagbark hickory', 'sugarberry',
       'swamp chestnut oak', 'sweetgum', 'emory oak',
       'mexican pinyon pine', 'blue spruce', 'douglas-fir',
       'quaking aspen', 'white fir', 'american hornbeam, musclewood',
       'red maple', 'river birch', 'swamp tupelo', 'sweetbay',
       'water oak', 'water tupelo', 'narrowleaf cottonwood',
       'plains cottonwood', 'rocky mountain juniper',
       'eastern cottonwood', 'fremont cottonwood',
       'southwestern white pine', 'jeffrey pine', 'loblolly pine',
       'pignut hickory', 'post oak', 'sassafras', 'southern red oak',
       'white oak', 'ameri

In [15]:
[s for s in unique_spp if 'pine' in str(s).lower()] # check pine species

['arizona pinyon pine',
 'ponderosa pine',
 'mexican pinyon pine',
 'southwestern white pine',
 'jeffrey pine',
 'loblolly pine',
 'longleaf pine',
 'slash pine',
 'arizona pine',
 'sugar pine',
 'knobcone pine',
 'lodgepole pine',
 'washoe pine',
 'subalpine fir',
 'whitebark pine',
 'gray or california foothill pine',
 'limber pine',
 'great basin bristlecone pine',
 'western white pine',
 'rocky mountain bristlecone pine',
 'jack pine',
 'eastern white pine',
 'sand pine',
 'shortleaf pine',
 'chihuahuan pine',
 'virginia pine',
 'spruce pine',
 'pond pine',
 'red pine']

In [16]:
# Identify the primary species from the Tree Table
# identify the species which make up 97% of landscape
spp_pr = (
    tree_metrics.groupby('species', as_index=False)
    .agg(
        balive=('balive', 'sum'),
        ba_ld=('ba_ld', 'sum'),
        tpa_live=('tpa_live', 'sum'),
        tpa_ld=('tpa_ld', 'sum') # live+dead
    )
)
# calculate the fractions for both BALIVE and TPA
spp_pr['balive_f'] = spp_pr['balive'] / spp_pr['balive'].sum()
spp_pr['ba_ld_f'] = spp_pr['ba_ld'] / spp_pr['ba_ld'].sum() # live+dead
spp_pr['tpa_live_f'] = spp_pr['tpa_live'] / spp_pr['tpa_live'].sum()
spp_pr['tpa_ld_f'] = spp_pr['tpa_ld'] / spp_pr['tpa_ld'].sum() # live+dead
# sort values and gather the primary species
spp_pr = spp_pr.sort_values(by='ba_ld_f', ascending=False)
# which species make up 97% of observations?
spp_pr['balive_f_csum'] = spp_pr['balive_f'].cumsum()
spp_pr['ba_ld_f_csum'] = spp_pr['ba_ld_f'].cumsum() # live+dead
spp_pr['tpa_live_f_csum'] = spp_pr['tpa_live_f'].cumsum()
spp_pr['tpa_ld_f_csum'] = spp_pr['tpa_ld_f'].cumsum() # live+dead

metric = 'ba_ld_f_csum'
top_species = spp_pr[spp_pr[metric] <= 0.97]
print(f"\nSpecies making up 97% of *{metric}*: \n\n{top_species[['species','ba_ld','tpa_ld']]}\n\n")


Species making up 97% of *ba_ld_f_csum*: 

                        species         ba_ld        tpa_ld
134              ponderosa pine  8.499388e+10  1.532846e+09
100              lodgepole pine  4.158066e+10  2.610325e+09
80                    grand fir  2.594398e+10  5.952783e+08
64                  douglas-fir  1.786395e+10  4.292833e+08
203                   white fir  1.048952e+10  3.140411e+08
93                 jeffrey pine  1.027366e+10  1.386313e+08
173               subalpine fir  8.368008e+09  5.478531e+08
198             western juniper  7.852138e+09  1.212141e+08
74             engelmann spruce  6.138088e+09  2.918200e+08
137               quaking aspen  3.315437e+09  1.616115e+08
199               western larch  3.314474e+09  1.001725e+08
90                incense-cedar  3.048827e+09  4.657282e+07
62   curlleaf mountain-mahogany  2.462039e+09  5.320397e+07
78                   gambel oak  1.289538e+09  2.520150e+08
188                utah juniper  8.019722e+08  1.567556e

In [20]:
# Aggregate species into forest groups
spp_grouping = {
    'pinyon|juniper|cedar': 'pinon-juniper',
    ' fir|spruce': 'spruce-fir',
    'limber|bristlecone': 'subalpine-pine',
    'ponderosa pine|Jeffrey': 'ponderosa', 
    'lodgepole pine': 'lodgepole',
    'aspen': 'quaking aspen',
    'Douglas': 'douglas-fir',
    'oak': 'oak-woodland',
    'cottonwood|willow': 'woody-riparian'
}

spp_remap = {} # dictionary to store the remap values
# Iterate over groups to create the species remap dictionary
for keywords, spp_group in spp_grouping.items():
    # Find species matching the keywords
    spp = tree_metrics[tree_metrics['species'].str.contains(keywords, case=False, na=False)]
    # Add matching species to the remap dictionary
    spp_remap.update({name: spp_group for name in spp['species'].unique()})

# Apply the remap to create a new grouped species column
tree_metrics['species_gp_n'] = tree_metrics['species'].map(spp_remap).fillna(tree_metrics['species'])
# Verify the updated species groups
print(tree_metrics[['species', 'species_gp_n']].drop_duplicates().head(10))

                        species     species_gp_n
0   common or two-needle pinyon    pinon-juniper
1                  utah juniper    pinon-juniper
7               oneseed juniper    pinon-juniper
12              velvet mesquite  velvet mesquite
19               honey mesquite   honey mesquite
20                 siberian elm     siberian elm
37                   gambel oak     oak-woodland
41            alligator juniper    pinon-juniper
42          arizona pinyon pine    pinon-juniper
44            singleleaf pinyon    pinon-juniper


In [34]:
# re-aggregate the TreeMap characteristics by FRP grid/species combinations
# summarizes forest metrics by individual species present in the grid cell
grid_tm = (
    tree_metrics.groupby(
        ['grid_index', 'species_gp_n'], as_index=False
    ).agg(
        tmid_n=('tm_id', 'nunique'), # number of unique tm_id contributing
        forest_pct=('forest_pct','max'), # retains the forest pixel count for the grid
        shannon_h=('shannon_h','mean'),
        balive=('balive', 'sum'),
        badead=('badead', 'sum'), 
        ba_ld=('ba_ld', 'sum'),
        tpa_live=('tpa_live', 'sum'),  
        tpa_dead=('tpa_dead', 'sum'),
        tpa_ld=('tpa_ld', 'sum'),
        tree_ht_live=('tree_ht_live', 'mean'),
        tree_ht_dead=('tree_ht_dead', 'mean')
    )
)

# Check results
print(f"\n{grid_tm.head(10)}\n")


   grid_index     species_gp_n  tmid_n  forest_pct  shannon_h         balive  \
0      183297    pinon-juniper       5   67.455621   6.045516   11841.398396   
1      183298    pinon-juniper       2   44.505495   4.670697   13703.381476   
2      183299   honey mesquite       1   27.810651   4.412579      31.797705   
3      183299    pinon-juniper       5   27.810651   5.330857   28807.804851   
4      183299     siberian elm       1   27.810651   4.412579    1253.933318   
5      183299  velvet mesquite       2   27.810651   4.718105     813.600795   
6      185547    pinon-juniper       8   65.384615   5.251518   19496.509407   
7      185548     oak-woodland       1   51.530612   8.300890     219.546075   
8      185548    pinon-juniper       8   51.530612   4.558589  483822.479251   
9      185549   honey mesquite       1   50.000000   4.412579      31.797705   

        badead          ba_ld     tpa_live   tpa_dead       tpa_ld  \
0  3273.429243   15114.827639   394.012649  25.4

In [35]:
# Re-assess the primary species after the regrouping
spp_pr = (
    grid_tm.groupby('species_gp_n', as_index=False)
    .agg(
        balive=('balive','sum'),
        tpa_live=('tpa_live', 'sum')
    )
)
# calculate the fraction
spp_pr['balive_f'] = spp_pr['balive'] / spp_pr['balive'].sum()
spp_pr['tpa_live_f'] = spp_pr['tpa_live'] / spp_pr['tpa_live'].sum()
# sort values and gather the primary species
spp_pr = spp_pr.sort_values(by='balive_f', ascending=False)
# which species make up 97% of observations?
spp_pr['balive_f_csum'] = spp_pr['balive_f'].cumsum()
spp_pr['tpa_live_f_csum'] = spp_pr['tpa_live_f'].cumsum()
top_species = spp_pr[spp_pr['tpa_live_f_csum'] <= 0.97]
print(
    f"\nSpecies making up 97% of the total *tpa_live_f_csum*: \n\n",
    f"{top_species[['species_gp_n','balive_f','tpa_live_f']]}\n\n")


Species making up 97% of the total *tpa_live_f_csum*: 

       species_gp_n  balive_f  tpa_live_f
75       ponderosa  0.420652    0.224648
102     spruce-fir  0.209676    0.236861
54       lodgepole  0.175468    0.348640
36     douglas-fir  0.075143    0.057792
72   pinon-juniper  0.055878    0.032680
62    oak-woodland  0.012929    0.039680
77   quaking aspen  0.012809    0.020704




In [36]:
# check on the aspen composition
spp = 'quaking aspen'
spp_idx = grid_tm[grid_tm['species_gp_n'] == spp]['grid_index'].unique() # list of grid indices
grid_spp = grid_tm[grid_tm['grid_index'].isin(spp_idx)]
# check how many aspen gridcells there are
n_spp_grids = len(grid_spp['grid_index'].unique())
print(f"{n_spp_grids} [{round(n_spp_grids/len(grid_tm['grid_index'].unique())*100,1)}%] grids w/ any aspen")

40717 [73.9%] grids w/ any aspen


In [37]:
# Identify dominant species for each grid cell by 'balive' or 'tpa_live'
# print these summaries for all species
metrics = ['tpa_ld', 'ba_ld']  # metrics to assess
for metric in metrics:
    print(f"\nDominance based on *{metric}*\n")
    dom_spp = (
        grid_tm.groupby('grid_index', as_index=False)
        .apply(lambda df: df.loc[df[metric].idxmax()])
        .reset_index(drop=True))
    # print the dominance for each major forest type
    for spp in top_species['species_gp_n'].unique():
        # Filter for grid cells where species is dominant
        spp_dominant = dom_spp[dom_spp['species_gp_n'] == spp]
        # Count the number of grid cells with aspen dominance
        n_dom = len(spp_dominant['grid_index'].unique())
        # Print the results
        total_grids = len(grid_tm['grid_index'].unique())
        spp_pct = round(n_dom / total_grids * 100, 1)
        print(f"\t~ {spp}: {n_dom} grids ({spp_pct}%)")
print("\n")
del dom_spp, spp_dominant
gc.collect()


Dominance based on *tpa_ld*

	~ ponderosa: 12785 grids (23.2%)
	~ spruce-fir: 15265 grids (27.7%)
	~ lodgepole: 18445 grids (33.5%)
	~ douglas-fir: 363 grids (0.7%)
	~ pinon-juniper: 2420 grids (4.4%)
	~ oak-woodland: 4379 grids (7.9%)
	~ quaking aspen: 1104 grids (2.0%)

Dominance based on *ba_ld*

	~ ponderosa: 24361 grids (44.2%)
	~ spruce-fir: 11969 grids (21.7%)
	~ lodgepole: 13116 grids (23.8%)
	~ douglas-fir: 386 grids (0.7%)
	~ pinon-juniper: 3500 grids (6.3%)
	~ oak-woodland: 412 grids (0.7%)
	~ quaking aspen: 890 grids (1.6%)




0

In [39]:
grid_tm.columns

Index(['grid_index', 'species_gp_n', 'tmid_n', 'forest_pct', 'shannon_h',
       'balive', 'badead', 'ba_ld', 'tpa_live', 'tpa_dead', 'tpa_ld',
       'tree_ht_live', 'tree_ht_dead'],
      dtype='object')

In [48]:
# filter to keep grids with our top/dominant species
grid_tm_dsp = grid_tm[grid_tm['species_gp_n'].isin(top_species['species_gp_n'].unique())]
# check how many this is compared to the total
n_grids = len(grid_tm['grid_index'].unique())
print(f"\n{round(len(grid_tm_dsp['grid_index'].unique()) / n_grids*100,3)}% grids w/ primary species\n")
# calculate how many grids are predominantly forested
n_forest = len(grid_tm_dsp[grid_tm_dsp['forest_pct'] > 50])
print(f"[{round(n_forest/len(grid_tm_dsp)*100,2)}%] predominantly forested grids.\n")

# Get the grid-level dominance and abundance proportions by species
# calculate the total balive and tpa for the grids
grid_totals = (
    grid_tm_dsp.groupby('grid_index', as_index=False)
    .agg(
        balive_total=('balive', 'sum'),
        badead_total=('badead', 'sum'),
        ba_ld_total=('ba_ld', 'sum'),
        tpa_live_total=('tpa_live', 'sum'),
        tpa_dead_total=('tpa_dead', 'sum'),
        tpa_ld_total=('tpa_ld', 'sum')
    )
)
# subset columns
grid_totals = grid_totals[['grid_index',
                           'balive_total','badead_total','ba_ld_total',
                           'tpa_live_total','tpa_dead_total','tpa_ld_total']]
# merge back to the grid data
grid_tm_dsp = grid_tm_dsp.merge(grid_totals, on='grid_index', how='left')

# calculate the species proportions (for live/dead)
# dominance (live/dead basal area)
grid_tm_dsp['sp_dominance_l'] = grid_tm_dsp['balive'] / grid_tm_dsp['balive_total']
grid_tm_dsp['sp_dominance_d'] = grid_tm_dsp['badead'] / grid_tm_dsp['badead_total']
grid_tm_dsp['sp_dominance_ld'] = grid_tm_dsp['ba_ld'] / grid_tm_dsp['ba_ld_total']

# abundance (live/dead TPA)
grid_tm_dsp['sp_abundance_l'] = grid_tm_dsp['tpa_live'] / grid_tm_dsp['tpa_live_total']
grid_tm_dsp['sp_abundance_d'] = grid_tm_dsp['tpa_live'] / grid_tm_dsp['tpa_live_total']
grid_tm_dsp['sp_abundance_ld'] = grid_tm_dsp['tpa_live'] / grid_tm_dsp['tpa_live_total']

# cheack on the results
grid_tm_dsp[['grid_index','species_gp_n',
             'sp_dominance_l','sp_dominance_d','sp_dominance_ld',
             'sp_abundance_l','sp_abundance_d','sp_abundance_ld'
            ]].head(20)


99.993% grids w/ primary species

[87.67%] predominantly forested grids.



Unnamed: 0,grid_index,species_gp_n,sp_dominance_l,sp_dominance_d,sp_dominance_ld,sp_abundance_l,sp_abundance_d,sp_abundance_ld
0,183297,pinon-juniper,1.0,1.0,1.0,1.0,1.0,1.0
1,183298,pinon-juniper,1.0,1.0,1.0,1.0,1.0,1.0
2,183299,pinon-juniper,1.0,1.0,1.0,1.0,1.0,1.0
3,185547,pinon-juniper,1.0,1.0,1.0,1.0,1.0,1.0
4,185548,oak-woodland,0.000454,0.0,0.000449,0.002568,0.002568,0.002568
5,185548,pinon-juniper,0.999546,1.0,0.999551,0.997432,0.997432,0.997432
6,185549,oak-woodland,0.000432,0.0,0.000424,0.002417,0.002417,0.002417
7,185549,pinon-juniper,0.999568,1.0,0.999576,0.997583,0.997583,0.997583
8,187795,pinon-juniper,0.914404,0.50939,0.859558,0.993015,0.993015,0.993015
9,187795,ponderosa,0.085596,0.49061,0.140442,0.006985,0.006985,0.006985


In [None]:
# load the lookup table for tm_id/fortypcd
fp = os.path.join(projdir,'data/spatial/mod/USFS/treemap_tmid_fortypcd_lookup.csv')
pixel_mapping = pd.read_csv(fp).drop(columns=['Unnamed: 0'])
pixel_mapping.head()

# join the algorithmic forest type code (FORTYPCD)
tree_metrics = tree_metrics.merge(pixel_mapping, on='tm_id', how='left')

# join in the forest type (FORTYPCD) name 
# load the TreeMap FORTYPCD species lookup table
fp = os.path.join(projdir,'data/tabular/mod/treemap_fortypcd_species_mapping.csv')
species_df = pd.read_csv(fp)
species_df = species_df[['FORTYPCD','SpeciesName']]
species_df.rename(columns={
    'FORTYPCD': 'fortypcd',
    'SpeciesName': 'fortypnm'
}, inplace=True)
species_df = species_df[species_df['fortypcd'].isin(tree_metrics['fortypcd'].unique())]
print(species_df.columns)

# join to the grid data
tree_metrics = tree_metrics.merge(species_df, on='fortypcd', how='left')
print(f"\n{tree_metrics.head(12)}\n")

In [None]:
grid_tm.columns

In [None]:
# Identify the primary species from the FORTYPCD
# calculate the fraction
# Identify the primary species from the Tree Table
spp_pr = (
    grid_tm.groupby('fortypcd_nm', as_index=False)
    .agg(
        count=('fortypcd_nm', 'size'),
        balive=('balive', 'sum'),
        batotal=('batotal', 'sum'),
        tpa_live=('tpa_live', 'sum'),
        tpa_ld=('tpa_ld', 'sum') # live+dead
    )
)
# calculate the fraction
spp_pr['fraction'] = spp_pr['count'] / spp_pr['count'].sum()
spp_pr = spp_pr.sort_values(by='fraction', ascending=False)
spp_pr['c_fraction'] = spp_pr['fraction'].cumsum()
top_species = spp_pr[spp_pr['c_fraction'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total: \n\n{top_species}\n\n")

In [None]:
# Do the species groupings here as well
spp_grouping = {
    'pinyon|juniper': 'pinon-juniper',
    ' fir|spruce': 'spruce-fir',
    'ponderosa pine': 'ponderosa', # just shorten the name here
    'lodgepole pine': 'lodgepole',
    'aspen': 'quaking aspen',
    'Douglas': 'douglas-fir',
    'oak': 'oak-woodland'
}

spp_remap = {}
# Iterate over groups to create the species remap dictionary
for keywords, spp_group in spp_grouping.items():
    # Find species matching the keywords
    spp = grid_tm[grid_tm['fortypcd_nm'].str.contains(keywords, case=False, na=False)]
    # Add matching species to the remap dictionary
    spp_remap.update({name: spp_group for name in spp['fortypcd_nm'].unique()})

# Apply the remap to create a new grouped species column
grid_tm['fortyp_gp_n'] = grid_tm['fortypcd_nm'].map(spp_remap).fillna(grid_tm['fortypcd_nm'])
# Verify the updated species groups
print(grid_tm[['fortypcd_nm', 'fortyp_gp_n']].drop_duplicates().head(10))

In [None]:
# Identify the primary species from the FORTYPCD
# calculate the fraction
# Identify the primary species from the Tree Table
spp_pr = (
    grid_tm.groupby('fortyp_gp_n', as_index=False)
    .agg(
        count=('fortyp_gp_n', 'size'),
        balive=('balive', 'sum'),
        batotal=('batotal', 'sum'),
        tpa_live=('tpa_live', 'sum'),
        tpa_ld=('tpa_ld', 'sum') # live+dead
    )
)
# calculate the fraction
spp_pr['fraction'] = spp_pr['count'] / spp_pr['count'].sum()
spp_pr = spp_pr.sort_values(by='fraction', ascending=False)
spp_pr['c_fraction'] = spp_pr['fraction'].cumsum()
top_species = spp_pr[spp_pr['c_fraction'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total: \n\n{top_species}\n\n")

In [None]:
# Merge the FORTYPCD to the forest metrics grid (grid_tm)
print(len(grid_tm))
print(len(grid_fortypcd))

In [None]:
# calculate the landscape fraction (prevalence)
spp_pr = (
    grid_tm.groupby('species', as_index=False)
    .agg(
        balive=('balive','sum'),
        tpa_live=('tpa_live', 'sum')
    )
)
# calculate the fraction
spp_pr['balive_f'] = spp_pr['balive'] / spp_pr['balive'].sum()
spp_pr['tpa_f'] = spp_pr['tpa_live'] / spp_pr['tpa_live'].sum()

# sort values and plot
spp_pr = spp_pr.sort_values(by='balive_f', ascending=False)

# which species make up 97% of observations?
spp_pr['balive_f_csum'] = spp_pr['balive_f'].cumsum()
spp_pr['tpa_f_csum'] = spp_pr['tpa_f'].cumsum()
top_species = spp_pr[spp_pr['balive_f_csum'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total *metric*: \n\n{top_species}\n\n")

# plot it
plt.figure(figsize=(6, 4))
sns.barplot(data=top_species, x='species', y='balive_f', palette='viridis')
plt.xlabel('Species')
plt.ylabel('Fractional live basal area')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
spp_dom = (
    grid_trees.groupby(['grid_index', 'COMMON_NAME'])
    .size()
    .reset_index(name='frequency')  # Count occurrences of each species
    .sort_values(['grid_index', 'frequency'], ascending=[True, False])
    .drop_duplicates('grid_index')  # Keep only the top species per grid_index
)
print(spp_dom.head())

# calculate the landscape fraction (prevalence)
spp_dom = spp_dom.groupby('COMMON_NAME')['frequency'].sum().reset_index()
spp_dom['fraction'] = spp_dom['frequency'] / spp_dom['frequency'].sum()
spp_dom = spp_dom.sort_values(by='fraction', ascending=False)

# which species make up 97% of observations?
spp_dom['c_fraction'] = spp_dom['fraction'].cumsum()
top_species = spp_dom[spp_dom['c_fraction'] <= 0.97]
print(f"\n\nSpecies making up 97% of the total: \n\n{top_species}\n\n")

# plot it
plt.figure(figsize=(6, 4))
sns.barplot(data=top_species, x='COMMON_NAME', y='fraction', palette='viridis')
plt.xlabel('Species')
plt.ylabel('Fractional (frequency)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Prepare data for stacked bar plot
stacked_data = comparison_table.set_index('COMMON_NAME')[
    ['fraction_balive', 'fraction_abundance', 'weighted_frequency']
].T

# Plot the stacked bar chart
stacked_data.plot(
    kind='bar',
    stacked=True,
    figsize=(12, 6),
    colormap='viridis'
)

# Customize the plot
plt.xlabel('Metric', fontsize=12)
plt.ylabel('Proportion', fontsize=12)
plt.title('Species Contribution by Different Metrics', fontsize=14)
plt.legend(title='Species', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# do some species regrouping and renaming
spp_grouping = {
    'pinyon|juniper': 'pinon-juniper',
    ' fir|spruce': 'spruce-fir',
    'ponderosa pine': 'ponderosa',
    'lodgepole pine': 'lodgepole'
}

spp_remap = {}
# Iterate over groups to create the species remap dictionary
for keywords, spp_group in spp_grouping.items():
    # Find species matching the keywords
    spp = grid_trees[grid_trees['COMMON_NAME'].str.contains(keywords, case=False, na=False)]
    # Add matching species to the remap dictionary
    spp_remap.update({name: spp_group for name in spp['COMMON_NAME'].unique()})

# Apply the remap to create a new grouped species column
grid_trees['SpeciesGroup'] = grid_trees['COMMON_NAME'].map(spp_remap).fillna(grid_trees['COMMON_NAME'])

# Verify the updated species groups
print(grid_trees[['COMMON_NAME', 'SpeciesGroup']].drop_duplicates().head(10))

In [None]:
# merge the live basal area
grid_trees = grid_trees.merge(ba_by_species, on=['grid_index', 'tm_id', 'COMMON_NAME'], how='left')
grid_trees[['grid_index','tm_id','count','COMMON_NAME','balive']].head(10)

In [None]:
# Collapse the tree data to species
row_counts = (
    grid_trees.groupby(['grid_index', 'tm_id', 'COMMON_NAME'])
    .size()
    .reset_index(name='tree_count')  # Name the count column 'tree_count'
)
# Merge the tree_count back into the original grid_trees DataFrame
grid_trees_sp = grid_trees.merge(row_counts, on=['grid_index', 'tm_id', 'COMMON_NAME'], how='left')
grid_trees_sp = grid_trees_sp.drop_duplicates(subset=['grid_index', 'tm_id', 'COMMON_NAME'])
grid_trees_sp[['grid_index', 'tm_id', 'COMMON_NAME', 'SpeciesGroup', 'tree_count', 'balive']].head(10)

In [None]:
# identify the dominant forest species for each "tm_id"
spp_dominance = (
    grid_trees_sp.groupby(['tm_id', 'SpeciesGroup'])['balive']  # Sum tree counts (or use another metric)
    .sum()
    .reset_index()
    .sort_values(['tm_id', 'balive'], ascending=[True, False])
    .drop_duplicates('tm_id')  # Keep only the top species per tm_id
)

# join back to the tm_id summary
grid_tmid_spp = grid_tmid.merge(spp_dominance[['tm_id', 'SpeciesGroup']], on='tm_id', how='left')

# identify the landscape proportion of dominant species
spp_pr = (
    grid_tmid_spp.groupby('SpeciesGroup')['count']
    .sum()
    .reset_index()
    .rename(columns={'count': 'maj_spp_count'})
    .sort_values(by='maj_spp_count', ascending=False)
)

# calculate the fraction
spp_pr['fraction'] = spp_pr['maj_spp_count'] / spp_pr['maj_spp_count'].sum()
spp_pr = spp_pr.sort_values(by='fraction', ascending=False)

# Identify species contributing 97% of the burned area
spp_pr['c_fraction'] = spp_pr['fraction'].cumsum()
top_species = spp_pr[spp_pr['c_fraction'] <= 0.97]
print(f"\nSpecies contributing to 97% of the burned area:\n{top_species}\n")

# plot it
plt.figure(figsize=(6, 4))
sns.barplot(data=top_species, x='SpeciesGroup', y='fraction', palette='viridis')
plt.xlabel('Species')
plt.ylabel('Fractional cover')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Calculate the species gradient (proportions) based on Tree Table
grid_trees.columns

In [None]:
# Calculate abundance (count) and dominance (basal area) for each species in each tm_id
sp_abundance = (
    grid_trees.groupby(['tm_id', 'COMMON_NAME'])['TREE'] 
    .sum()
    .reset_index()
    .rename(columns={'TREE': 'abundance'})
)
sp_dominance = (
    grid_trees.groupby(['tm_id', 'COMMON_NAME'])['balive']
    .sum()
    .reset_index()
    .rename(columns={'balive': 'dominance'})
)

# join these together
composition = sp_abundance.merge(sp_dominance, on=['tm_id', 'COMMON_NAME'], how='outer').fillna(0)

# Normalize abundance/dominance within each tm_id (plot)
composition['abundance'] = (
    composition.groupby('tm_id')['abundance'].transform(lambda x: x / x.sum())
)
composition['dominance'] = (
    composition.groupby('tm_id')['dominance'].transform(lambda x: x / x.sum())
)

# check the results
composition.head()

In [None]:
ba_by_species[ba_by_species['tm_id'] == 3726]

In [None]:
composition[composition['tm_id'] == 3726]

In [None]:
grid_trees[grid_trees['tm_id'] == 3726]

In [None]:
# test tree table
tree_tbl[tree_tbl['tm_id'] == 21404][['tm_id','COMMON_NAME','DIA','HT','CR','TREE']]