### This notebook runs a time-varying fit on MTBS data in the USA and creates toy visualizations to show how the CCDF shape changes over time.

In [2]:
import os
import re
import geopandas as gpd
import rasterio
import rasterio.mask
import numpy as np
import pandas as pd
from collections import Counter
from scipy import stats
import pickle
import sys

sys.path.append("..")
from utils.temporal_analysis import *
from utils import wildfire_powerlaw as wfpl

in_shp = "/Users/lukevonkapff/wildfires/MTBS/mtbs_classified.shp"
mtbs_classified = gpd.read_file(in_shp)

In [None]:
overall_results = {}

for modis_cat, subset in mtbs_classified.groupby("modis_cl_1"):
    data = subset["area_km2"].values
    
    if len(data) == 0:
        continue
    
    print(f"\n=== {modis_cat} (n={len(data)}) ===")
    
    params = wfpl.summarize_parameters_bootstrap(
        data, R=150, xmin=4, random_state=42
    )
    R, best = wfpl.likelihood_matrix_and_best(
        data, xmin=4
    )
    
    overall_results[modis_cat] = {
        "params": params,
        "likelihood_matrix": R,
        "best_fit": best
    }


=== Barren or sparsely vegetated (n=10) ===


  return (1.0 / self.sigma) * z ** (-(1.0 / self.xi) - 1.0)
  np.max(np.abs(fsim[0] - fsim[1:])) <= fatol):
  CDF = CDF/norm
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


there are 10 points above xmin 4.0 km^2

=== Closed shrublands (n=159) ===


  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


there are 159 points above xmin 4.0 km^2

=== Cropland/Natural vegetation mosaic (n=2) ===


  return (1.0 / self.sigma) * z ** (-(1.0 / self.xi) - 1.0)
  np.max(np.abs(fsim[0] - fsim[1:])) <= fatol):
  CDF = CDF/norm
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values

there are 2 points above xmin 4.0 km^2

=== Croplands (n=208) ===


  CDF = CDF/norm
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme

In [None]:
timevary_results = analyze_time_varying_mle(mtbs_classified, overall_results, xmin = 4, R_boot = 150)
df_both = summarize_timevary_results_mode(timevary_results, mode="both")

In [None]:
# Can also specify mode=p1_only or p2_only in previous cell
# to fix time component of 1 parameter
print("=== BOTH PARAMETERS ===")
df_both

In [None]:
plot_distribution_evolution_ccdf(df_both)

In [None]:
# In case one is curious distribution of savanna fires
# Can easily to tweaked to show where other biome fires exist
plot_savanna_fires(mtbs_classified, biome = "both")