In [None]:
""" 
Author: maxwell.cook@colorado.edu
"""

import os, sys, gc, time
import geopandas as gpd
import rasterio as rio
import rioxarray as rxr
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from scipy.stats import pearsonr

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

proj = 'EPSG:5070'

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Ready to go !")

In [None]:
fp = os.path.join(projdir,'data/tabular/mod/gridstats_fortypnm_gp_tm_ct.csv')
tm = pd.read_csv(fp)
tm.drop(columns=['Unnamed: 0'], inplace=True)
tm.columns

In [None]:
len(tm)

In [None]:
# load the CBI data
fp = os.path.join(projdir,'data/earth-engine/exports/gridstats_cbibc_forest.csv')
sev = pd.read_csv(fp)
sev.drop(columns=['system:index','.geo'], inplace=True)
# create a unique ID
sev['grid_idx'] = sev['Fire_ID'].astype(str) + sev['grid_index'].astype(str)
print(sev.columns)

# Rename the columns for clarity
cols = ['mean', 'p90', 'p95', 'p99', 'stdDev']
for col in cols:
    new_name = 'CBIbc_' + col
    sev.rename(columns={col: new_name}, inplace=True)
sev.drop(columns=['Fire_ID','grid_index'], inplace=True)
sev['grid_idx'] = sev['grid_idx'].astype(int)
# merge the dataframes
tm_sev = tm.merge(sev, on='grid_idx', how='left')
tm_sev[['grid_idx','fortypnm_gp','fortyp_pct','species_gp_n','frp_max_day','CBIbc_mean',
        'CBIbc_p90','CBIbc_p95','tpp_ld_pr','ba_ld_pr','qmd_ld_pr']].head(10)

In [None]:
# Plot FRP by dominant forest type
# create a dataframe with dominance (>50% of the same species)
dom = tm_sev[tm_sev['fortyp_pct'] > 0.50]
# Find the median and sort the forest types by FRP
med = dom.groupby('fortypnm_gp')['CBIbc_p90'].median().sort_values(ascending=False)
ordered = med.index.tolist()

# plot it
plt.figure(figsize=(6, 4)) 

sns.boxplot(
    data=dom, 
    x='CBIbc_p90', 
    y='fortypnm_gp', 
    order=ordered, 
    palette='inferno', 
    dodge=False)

# plt.xscale('log') # log-scale FRP
plt.xlabel('Composite Burn Index')
plt.ylabel('Forest Type')
plt.tight_layout()

out_plot = os.path.join(projdir, 'figures/FigureX_FORTYPCD_DominantSpecies_CBIbc_p90.png')
plt.savefig(out_plot, dpi=300, bbox_inches='tight')

plt.show()
print(f"\nSaved to: {out_plot}\n")

In [None]:
tm_sev.columns

In [None]:
print(len(tm))
print(len(sev))
print(len(tm_sev))

In [None]:
print(f"\nNaNs in CBI: {tm_sev['CBIbc_p90'].isna().sum()}\n")
print(f"NaNs in FRP: {tm_sev['frp_max_day'].isna().sum()}\n")
print(f"NaNs in FORTYP: {tm_sev['fortypnm_gp'].isna().sum()}\n")

In [None]:
tm_sev = tm_sev[~tm_sev['CBIbc_p90'].isna()]
print(len(tm_sev))

In [None]:
print(tm_sev.columns)

In [None]:
# save the table out
out_fp = os.path.join(projdir,'data/tabular/mod/gridstats_fortypnm_gp_tm_ct_frp-cbi.csv')
tm_sev.to_csv(out_fp)
print(f"Saved to: {out_fp}")

In [None]:
tm_sev['species_gp_n'].unique()

In [None]:
# plots.

In [None]:
# Correlations between FRP and CBIbc for dominant species
df = dom.copy() # work with a copy
df = df[df['frp_csum'] > 0]
df = df[df['CBIbc_p90'] > 0]

# by forest type (grouped)
cors = {}
for tree_type, group in df.groupby('fortypnm_gp'):
    corr, p_value = pearsonr(group['frp_csum'], group['CBIbc_p90'])
    cors[tree_type] = (corr, p_value)
    print(f"Tree Type: {tree_type} - Pearson correlation: {corr:.2f}, p-value: {p_value:.4f}")

print(f"\n")
cor_df = pd.DataFrame(cors, index=['correlation', 'p_value']).T
print("\nCorrelation by Tree Type:\n")
cor_df = cor_df.sort_values('correlation', ascending=False)
cor_df.head(6)

In [None]:
df = tm_sev.copy()
df = df[df['frp_csum'] > 0]
df = df[df['CBIbc_p90'] > 0]

# keep only one row per
df = df.drop_duplicates(subset=['grid_index','fortypnm_gp'])
print(len(df))

sorted_species = sorted(cors.keys(), key=lambda x: abs(cors[x][0]), reverse=True)

g = sns.lmplot(
    data=df, x='frp_csum', y='CBIbc_p90', hue='fortypnm_gp', col='fortypnm_gp', 
    col_wrap=3, height=2, aspect=1.2, scatter_kws={'alpha':0.4,'s': 15}, line_kws={'color':'black'}, 
    sharey=True, sharex=False, col_order=sorted_species, palette='viridis' 
)

# Adjust the titles to remove the "species_name = " prefix
for ax in g.axes.flat:
    ax.set_ylim(0, df['CBIbc_p90'].max()+0.1)  # Set the y-axis limit from 0 to 4
    
    species_name = ax.get_title().replace('fortypnm_gp = ', '')
    corr, p_value = cors.get(species_name, (None, None))
    
    # Set title with correlation coefficient
    ax.set_title(f"{species_name}\n$r={corr:.2f}$, $p={p_value:.2f}$", fontsize=10)

g.set_axis_labels('', '')
g.fig.text(0.5, -0.015, 'Fire Radiative Power (cumulative)', ha='center', va='center', fontsize=12)
g.fig.text(0.015, 0.85, 'Composite Burn Index (mean)', ha='center', va='center', rotation='vertical', fontsize=12)
g.fig.set_size_inches(7.5, 3)  # Set figure dimensions
plt.subplots_adjust(top=1.5)  # Adjust subplot spacing

plt.savefig(os.path.join(projdir,'figures/FigureX_FORTYPNMGP-FRP-CBIbc.png'), dpi=150, bbox_inches='tight')

plt.show()

In [None]:
out_fp = os.path.join(projdir, 'data/tabular/mod/gridstats_frp-cbi_pearson.csv')
cor_df.to_csv(out_fp)
print(f"Saved to: {out_fp}")

In [None]:
# get aspen grids (any aspen component)
# get gridcells with at least some aspen
aspen_idx = tm_sev[tm_sev['fortypnm_gp'] == 'Aspen']['grid_index'].unique() # list of grid indices
grid_aspen = tm_sev[tm_sev['grid_index'].isin(aspen_idx)]
# check how many aspen gridcells there are
n_aspen = len(grid_aspen['grid_index'].unique())
print(f"There are a total of {n_aspen} [{round(n_aspen/len(tm_sev['grid_index'].unique())*100,1)}%] gridcells with aspen dominance.")

In [None]:
# create bins for species composition
bins = np.linspace(0, 100, 6) # 20% intervals
spps = [s for s in tm_sev['fortypnm_gp'].unique() if s != 'Aspen'] # exclude aspen from species list
print(f"Associate forest type: {spps}\n")

# plot the associateions with aspen
# waffle chart version
df = grid_aspen.copy()

# create grid data
tiles = []  # Store tile data
for sp in spps:
    # Filter for co-occurrence
    df_sp = df[df['fortypnm_gp'].isin(['Aspen', sp])]

    # Pivot to create columns for Aspen and the species
    cover_df = (
        df_sp.pivot_table(index='grid_index', columns='fortypnm_gp', values='fortyp_pct', fill_value=0)
        .reset_index()
        .rename(columns={'Aspen': 'aspen_pct', sp: f'{sp}_pct'})
    )

    # make sure there is some of each kind
    cover_df['combined_pct'] = cover_df['aspen_pct'] + cover_df[f'{sp}_pct']
    cover_df = cover_df[cover_df['combined_pct'] > 50]

    # Merge with FRP data
    frp_df = grid_aspen[['grid_index','CBIbc_mean','CBIbc_p95','frp_max_day','frp_csum']].drop_duplicates()
    cover_df = cover_df.merge(frp_df, on='grid_index', how='left')

    # Bin Aspen and species percent cover
    cover_df['aspen_bin'] = pd.cut(cover_df['aspen_pct'], bins, labels=range(len(bins) - 1))
    cover_df[f'{sp}_bin'] = pd.cut(cover_df[f'{sp}_pct'], bins, labels=range(len(bins) - 1))

    # Group by bins and calculate statistics
    grouped = cover_df.groupby(['aspen_bin', f'{sp}_bin'])
    counts = grouped.size().reset_index(name='freq')  # Frequency for tile size
    mean_cbi = grouped['CBIbc_p95'].mean().reset_index(name='CBIbc')  # Mean FRP for color
    mean_frp = grouped['frp_max_day'].mean().reset_index(name='frp')  # Mean FRP for color

    # Merge statistics into a single DataFrame
    tiled_data = pd.merge(counts, mean_frp, on=['aspen_bin', f'{sp}_bin'])
    tiled_data = pd.merge(tiled_data, mean_cbi, on=['aspen_bin', f'{sp}_bin'])
    tiled_data['species'] = sp
    tiles.append(tiled_data)

# Combine tile data
tiles_df = pd.concat(tiles, ignore_index=True)

# Normalize tile sizes (scale frequency)
tiles_df['tile_size'] = (tiles_df['freq'] / tiles_df['freq'].max()) ** 0.2  # Square root scaling for better size balance

# Plotting the tile-scaled heatmap
n_cols = 3 # Number of columns in the facet grid
n_rows = int(np.ceil(len(spps) / n_cols))
fig, axes = plt.subplots(n_rows, n_cols, figsize=(9,3.5), constrained_layout=True)
axes = axes.flatten()

vmin = tiles_df['CBIbc'].min()
vmax = tiles_df['CBIbc'].max()
norm = mcolors.Normalize(vmin=vmin, vmax=vmax)  # Define color normalization based on raw FRP range

# make the grids
for i, sp in enumerate(spps):
    sp_data = tiles_df[tiles_df['species'] == sp]
    ax = axes[i]

    sp_vmin = sp_data['CBIbc'].min()
    sp_vmax = sp_data['CBIbc'].max()

    for _, row in sp_data.iterrows():
        x = int(row[f'{sp}_bin'])  
        y = int(row['aspen_bin'])  
        size = row['tile_size'] * 0.9
        
        norm = mcolors.Normalize(vmin=sp_vmin, vmax=sp_vmax)
        color = plt.cm.coolwarm(norm(row['CBIbc']))
        
        ax.add_patch(
            plt.Rectangle(
                (x - size / 2, y - size / 2), size, size, color=color, ec='black', lw=0.5
            )
        )

    # Formatting the axes
    # ax.set_title(sp, size=10)
    ax.text(
        0.95, 0.95, sp,  # x, y position in axis coordinates (top-right corner)
        transform=ax.transAxes,  # Use axis coordinates (0,0 is bottom-left, 1,1 is top-right)
        ha='right', va='top',  # Align the text to the top-right
        fontsize=10, color='black', bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.2')
    )
    ax.set_xlabel(f'')
    ax.set_ylabel('Aspen Cover (%)', size=9)
    ax.set_xticks(range(len(bins) - 1))
    ax.set_yticks(range(len(bins) - 1))
    ax.set_xticklabels([f'{int(b)}%' for b in bins[:-1]], size=8)
    ax.set_yticklabels([f'{int(b)}%' for b in bins[:-1]], size=8)
    ax.set_xlim(-0.5, len(bins) - 1.5)
    ax.set_ylim(-0.5, len(bins) - 1.5)

    sm = plt.cm.ScalarMappable(cmap=plt.cm.coolwarm, norm=norm)
    cbar = fig.colorbar(sm, ax=ax, orientation='vertical', fraction=0.50, pad=0.1)
    cbar.set_label('CBI')

# Remove unused axes
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

del df

out_plot = os.path.join(projdir, 'figures/FigureX_CoOccurring_Heatmap_CBIbc_Scaled.png')
plt.savefig(out_plot, dpi=300, bbox_inches='tight')

plt.show()

In [None]:
# plot the distribution of percent cover
df = grid_aspen.copy() # work with a copy

kde_data = []
for sp in spps:
    df_sp = df[df['fortypnm_gp'].isin(['Aspen', sp])]

    # Pivot table to create columns for Aspen and the species
    cover_df = (
        df_sp.pivot_table(index='grid_index', columns='fortypnm_gp', values='fortyp_pct', fill_value=0)
        .reset_index()
        .rename(columns={'Aspen': 'Aspen', sp: sp})
    )

    # Filter for co-occurrence
    cover_df['combined_pct'] = cover_df['Aspen'] + cover_df[sp]
    cover_df = cover_df[cover_df['combined_pct'] > 60]

    # Melt data for Seaborn FacetGrid
    melted = cover_df.melt(
        id_vars='grid_index', 
        value_vars=['Aspen', sp], 
        var_name='species', 
        value_name='pct_cover'
    )
    melted['other_spp'] = sp  # Add metadata for faceting
    kde_data.append(melted)

# Combine data for all species
kde_data_df = pd.concat(kde_data, ignore_index=True)

# FacetGrid with the fixed palette
g = sns.FacetGrid(kde_data_df, col="other_spp", col_wrap=3, height=2, sharex=True, sharey=False)

# Map kdeplot with consistent colors
g.map_dataframe(
    sns.kdeplot, 
    x="pct_cover", 
    hue="species", 
    fill=True, 
    alpha=0.6, 
    common_norm=False, 
)
    
# Adjust labels and titles
g.set_titles("{col_name}")
g.set_axis_labels("Percent Cover (%)", "Density")
g.tight_layout()

out_plot = os.path.join(projdir, 'figures/FigureX_CoOccurring_CoverDistribution.png')
plt.savefig(out_plot, dpi=300, bbox_inches='tight')

plt.show()

In [None]:
# create bins for species composition
bins = np.linspace(0, 100, 6) # 20% intervals
spps = [s for s in grid_aspen['fortypnm_gp'].unique() if s != 'Aspen'] # exclude aspen from species list
spps = ['Mixed-conifer', 'Piñon-juniper', 'Ponderosa', 'Spruce-fir', 'Lodgepole']
print(f"Associate forest type: {spps}\n")

# Number of rows based on species
n_rows = len(spps)  # One row per species
fig, axes = plt.subplots(n_rows, 3, figsize=(8, 9), constrained_layout=True)

for i, sp in enumerate(spps):
    # ===== TILE PLOT =====
    sp_data = tiles_df[tiles_df['species'] == sp]
    ax_tile = axes[i, 0]

    # Tile Plot Data
    sp_vmin = sp_data['frp'].min()
    sp_vmax = sp_data['frp'].max()
    norm = mcolors.Normalize(vmin=sp_vmin, vmax=sp_vmax)

    for _, row in sp_data.iterrows():
        x = int(row[f'{sp}_bin'])
        y = int(row['aspen_bin'])
        size = row['tile_size'] * 0.9
        color = plt.cm.coolwarm(norm(row['frp']))

        ax_tile.add_patch(
            plt.Rectangle(
                (x - size / 2, y - size / 2), size, size, color=color, ec='black', lw=0.5
            )
        )

    # Formatting Tile Plot
    ax_tile.text(
        0.95, 0.95, sp,  # Top-right corner
        transform=ax_tile.transAxes,
        ha='right', va='top',
        fontsize=10, color='black', bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.2')
    )
    ax_tile.set_xlabel(f'{sp} Cover (%)', size=9)
    ax_tile.set_ylabel('Aspen Cover (%)', size=9)
    ax_tile.set_xticks(range(len(bins) - 1))
    ax_tile.set_yticks(range(len(bins) - 1))
    ax_tile.set_xticklabels([f'{int(b)}%' for b in bins[:-1]], size=8)
    ax_tile.set_yticklabels([f'{int(b)}%' for b in bins[:-1]], size=8)
    ax_tile.set_xlim(-0.5, len(bins) - 1.5)
    ax_tile.set_ylim(-0.5, len(bins) - 1.5)

    # Add colorbar to tile plot
    sm = plt.cm.ScalarMappable(cmap=plt.cm.coolwarm, norm=norm)
    cbar = fig.colorbar(sm, ax=ax_tile, orientation='vertical', fraction=0.05, pad=0.1)
    cbar.set_label('Maximum FRP')
    
    # ===== TILE PLOT =====
    ax_tile = axes[i, 1]

    # Tile Plot Data
    sp_vmin = sp_data['CBIbc'].min()
    sp_vmax = sp_data['CBIbc'].max()
    norm = mcolors.Normalize(vmin=sp_vmin, vmax=sp_vmax)

    for _, row in sp_data.iterrows():
        x = int(row[f'{sp}_bin'])
        y = int(row['aspen_bin'])
        size = row['tile_size'] * 0.9
        color = plt.cm.coolwarm(norm(row['CBIbc']))

        ax_tile.add_patch(
            plt.Rectangle(
                (x - size / 2, y - size / 2), size, size, color=color, ec='black', lw=0.5
            )
        )

    # Formatting Tile Plot
    ax_tile.text(
        0.95, 0.95, sp,  # Top-right corner
        transform=ax_tile.transAxes,
        ha='right', va='top',
        fontsize=10, color='black', bbox=dict(facecolor='white', edgecolor='none', boxstyle='round,pad=0.2')
    )
    ax_tile.set_xlabel(f'{sp} Cover (%)', size=9)
    ax_tile.set_ylabel('Aspen Cover (%)', size=9)
    ax_tile.set_xticks(range(len(bins) - 1))
    ax_tile.set_yticks(range(len(bins) - 1))
    ax_tile.set_xticklabels([f'{int(b)}%' for b in bins[:-1]], size=8)
    ax_tile.set_yticklabels([f'{int(b)}%' for b in bins[:-1]], size=8)
    ax_tile.set_xlim(-0.5, len(bins) - 1.5)
    ax_tile.set_ylim(-0.5, len(bins) - 1.5)

    # Add colorbar to tile plot
    sm = plt.cm.ScalarMappable(cmap=plt.cm.coolwarm, norm=norm)
    cbar = fig.colorbar(sm, ax=ax_tile, orientation='vertical', fraction=0.05, pad=0.1)
    cbar.set_label('95th CBIbc')

    # ===== KDE PLOT =====
    sp_kde_data = kde_data_df[kde_data_df["other_spp"] == sp]
    ax_kde = axes[i, 2]

    # cp = {
    #     "Aspen": "#7fbf7b",  
    #     sp: "#af8dc3"         
    # }
   
    sns.kdeplot(data=sp_kde_data, 
        x="pct_cover", 
        hue="species", 
        fill=True, 
        alpha=0.6, 
        common_norm=False,
        # palette=cp,
        ax=ax_kde,
    )

    # Formatting KDE Plot
    # ax_kde.set_title(f'{sp}', size=10)
    ax_kde.set_xlabel('Percent Cover (%)', size=9)
    ax_kde.set_ylabel('Density', size=9)

    legend = ax_kde.legend_  # Get the legend object for the KDE plot
    if legend:
        legend.set_title("")  # Remove the legend title
        for text in legend.get_texts():  # Adjust the size of legend text
            text.set_fontsize(8)
            
# Adjust layout and show plot
# plt.tight_layout()

out_plot = os.path.join(projdir, 'figures/Figure5_CoOccurring_Heatmap_CoverDistribution_FRP_CBIbc.png')
plt.savefig(out_plot, dpi=300, bbox_inches='tight')

plt.show()