# Analyze Project Results


This notebook is indended to analyze and visualize the the accuracy of the FMC models in an interactive setting.

Note: some function calls are commented out since they involve memory intensive map creation. Uncomment to generate the maps, but you may experience memory issues if you try to render all at once.

Note: some outputs automatically generated in `report_materials.py`, this notebook is meant to compliment that script and provide more granular control

## Setup

In [None]:
import os
import os.path as osp
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import sys
import h5py
import re
from dateutil.relativedelta import relativedelta
sys.path.append('../src')
from utils import time_range, Dict, read_yml, read_pkl, print_dict_summary, str2time
from viz import plot_styles, plot_one, make_st_map_interactive

In [None]:
ml_forecast_dir = "../outputs/forecast_outputs"

## All National stations

In [None]:
ml_data = pd.read_pickle(osp.join(ml_forecast_dir, "ml_data.pkl"))

In [None]:
locs = [ml_data[st]["loc"] for st in ml_data]
df = pd.DataFrame(locs)
print(f"Number of Stations in Study Region: {df.shape[0]}")

In [None]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from cartopy.io.img_tiles import StadiaMapsTiles

tile_provider = StadiaMapsTiles(
    "e3df6cd5-1ba5-4749-8587-f79893428032",
    style="stamen_terrain"
)

def plot_points(df, zoom=5):
    import matplotlib.pyplot as plt
    import cartopy.crs as ccrs
    from cartopy.io.img_tiles import OSM, Stamen

    # proj = ccrs.PlateCarree()
    proj = ccrs.LambertConformal(central_longitude=-110, central_latitude=40)

    # fig, ax = plt.subplots(
    #     figsize=(8, 6),
    #     subplot_kw={"projection": proj},
    # )
    fig, ax = plt.subplots(figsize=(8, 6), subplot_kw={"projection": proj})

    # Bounding box from data + padding
    lon_min, lon_max = df["lon"].min(), df["lon"].max()
    lat_min, lat_max = df["lat"].min(), df["lat"].max()
    pad_lon = (lon_max - lon_min) * 0.1 or 0.5
    pad_lat = (lat_max - lat_min) * 0.1 or 0.5

    ax.set_extent(
        [lon_min - pad_lon, lon_max + pad_lon,
         lat_min - pad_lat, lat_max + pad_lat],
        crs=ccrs.PlateCarree(),
    )

    ax.add_image(tile_provider, zoom, alpha=.7)

    # Scatter the points
    ax.scatter(
        df["lon"],
        df["lat"],
        s=25,
        transform=ccrs.PlateCarree(),
        edgecolor="black",   
        linewidth=0.8,
        color="cyan"
    )

    # Grid
    gl = ax.gridlines(draw_labels=True, x_inline=False, y_inline=False)
    gl.top_labels = False
    gl.right_labels = False
    gl.xlines = False
    gl.ylines = False
   
    
    return fig, ax

In [None]:
# plot_points(df)
# plt.savefig("../outputs/st_map_carto.png")

In [None]:
# import plotly.io as pio
# pio.kaleido.scope.default_scale = 10
# import importlib; import viz; importlib.reload(viz); from viz import make_st_map_interactive
# make_st_map_interactive(df)

### Read Results


In [None]:
overall = pd.read_csv(osp.join(ml_forecast_dir, "overall.csv"))
by_dt = pd.read_csv(osp.join(ml_forecast_dir, "by_dt.csv"))
by_hod = pd.read_csv(osp.join(ml_forecast_dir, "by_hod.csv"))
by_st = pd.read_csv(osp.join(ml_forecast_dir, "by_stid.csv"))
sts = pd.read_csv(osp.join(ml_forecast_dir, "stid_locs.csv"))
rnn = pd.read_csv(osp.join(ml_forecast_dir, "rnn_preds.csv"))
var = pd.read_csv(osp.join(ml_forecast_dir, "all_variables_summary.csv"))

by_st = by_st.merge(sts, on="stid", how="left")

## Accuracy by Low-Med-High

In [None]:
bins = [0, 10, 20, float('inf')]
labels = ['Low (0-10)', 'Med (10-20)', 'High (20+)']

rnn['fm_label'] = pd.cut(rnn['fm'], bins=bins, labels=labels, right=False)

In [None]:
table = (
    rnn.groupby('fm_label', observed=False)
      .agg(Mean_Bias=('residual', 'mean'),
           RMSE=('squared_error', lambda x: np.sqrt(x.mean())),
           RelError=('abs_error', lambda x: x.mean() / rnn.loc[x.index, 'fm'].mean()))
      .reset_index()
)

table.columns = ["FMC Level", "Bias", "RMSE", "Relative Error"]

In [None]:
table

## Loss Time Series Graphics

This graphic is meant to show observed vs predicted for a 48 hour stretch to visualize how a single loss calculation is performed in the RNN.

In [None]:
start = pd.to_datetime(str2time("2024-01-01T00:00:00Z"))
end = pd.to_datetime(str2time("2024-01-02T23:00:00Z"))
st = "TT562"
rnn.date_time = pd.to_datetime(rnn.date_time)
df = rnn[(rnn.stid == st) & (rnn.date_time >= start) & (rnn.date_time <= end)]
df = df[df.rep == df.rep.unique()[0]]

In [None]:
import matplotlib.dates as mdates
plt.plot(df.date_time, df.preds, color="k", linestyle="dashed", label="Predicted")
plt.plot(df.date_time, df.fm, **plot_styles["fm"])
plt.legend(loc="upper right")
plt.grid()
plt.ylim(7.5, 20)
plt.ylabel("FMC (%)")
plt.rcParams.update({"axes.labelsize": 14, "xtick.labelsize": 12, "ytick.labelsize": 12, "legend.fontsize": 12})
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.HourLocator(interval=1))
ax.set_xticks(df.date_time[[0, 12, 24, 36, 47]])
ax.set_xticklabels(df.date_time[[0, 12, 24, 36, 47]].dt.strftime("%Y-%m-%d\n%H:%M"), rotation=45)
plt.tight_layout()
plt.savefig(osp.join(ml_forecast_dir, "loss.png"), dpi=600)

This maps stations with binary colors to construct a train/test split visualization.

In [None]:
# np.random.seed(20250509) # date of first run
# sts["train"] = np.random.choice([1, 0], size=len(sts), p=[0.8, 0.2])

# make_st_map_interactive(sts, color="train", binary=True)

## Error by Location

Grouping by station, mapping error, histogram of error for high/low altitude stations (1800m threshold). A couple versions: just the map, just the histogram, then composed together

In [None]:
df = by_st[by_st.Model == "rnn"].copy()
df["RMSE"] = df.loc[:,"rmse_mean"]
df["High"] = df["elev"] > 1800

In [None]:
tile_provider = StadiaMapsTiles(
    "e3df6cd5-1ba5-4749-8587-f79893428032",
    style="stamen_terrain"
)

def plot_points(df, zoom=5, color=None, shape=None, fig=None, ax=None):
    import matplotlib.pyplot as plt
    import cartopy.crs as ccrs
    from cartopy.io.img_tiles import OSM, Stamen
    from matplotlib.colors import TwoSlopeNorm

    proj = ccrs.LambertConformal(central_longitude=-110, central_latitude=40)
    if ax is None:
            # fig, ax = plt.subplots(...)    
            fig, ax = plt.subplots(figsize=(8, 6), subplot_kw={"projection": proj})

    # Bounding box from data + padding
    lon_min, lon_max = df["lon"].min(), df["lon"].max()
    lat_min, lat_max = df["lat"].min(), df["lat"].max()
    pad_lon = (lon_max - lon_min) * 0.1 or 0.5
    pad_lat = (lat_max - lat_min) * 0.1 or 0.5

    ax.set_extent(
        [lon_min - pad_lon, lon_max + pad_lon,
         lat_min - pad_lat, lat_max + pad_lat],
        crs=ccrs.PlateCarree(),
    )

    ax.add_image(tile_provider, zoom, alpha=.7)

    # Scatter Color
    if color is not None:
        # norm = plt.Normalize(vmin=df[color].min(), vmax=df[color].max())
        # cmap = plt.colormaps["viridis"]
        # colors = cmap(norm(df[color]))

        norm = TwoSlopeNorm(
            vmin=df[color].min(),
            vcenter=3.0,     # your chosen midpoint
            vmax=df[color].max()
        )
        cmap = plt.colormaps.get_cmap("RdBu_r")
        colors = cmap(norm(df[color]))
    else:
        colors = "cyan"   # fallback

    # Scatter Shape
    if shape is not None:
        # Normalize to real booleans:
        bool_vals = df[shape].astype(bool)
    
        df_true = df[bool_vals]
        df_false = df[~bool_vals]
    
        # TRUTHY → TRIANGLE
        ax.scatter(
            df_true["lon"], df_true["lat"],
            transform=ccrs.PlateCarree(),
            marker="^",          # triangle
            s=35,
            edgecolor="black",
            linewidth=0.8,
            c=colors[bool_vals] if color else "cyan",
            alpha=0.7,
        )
    
        # FALSEY → SQUARE
        ax.scatter(
            df_false["lon"], df_false["lat"],
            transform=ccrs.PlateCarree(),
            marker="s",          # square
            s=35,
            edgecolor="black",
            linewidth=0.8,
            c=colors[~bool_vals] if color else "cyan",
            alpha=0.7,
        )
    
    else:
        # No shape argument → single scatter
        ax.scatter(
            df["lon"], df["lat"],
            transform=ccrs.PlateCarree(),
            s=35,
            edgecolor="black",
            linewidth=0.8,
            c=colors,
            alpha=0.7,
        )

    # Colorbar
    if color is not None:
        sm = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
        sm.set_array([])
        
        # add new axis for colorbar to the right
        cax = fig.add_axes([
            ax.get_position().x1 + 0.02,    # a little to the right
            ax.get_position().y0,
            0.02,                           # bar width
            ax.get_position().height
        ])
        
        cbar = fig.colorbar(sm, cax=cax, orientation="vertical")
        cbar.set_label(color)

    # Shape Legend
    if shape is not None:
        import matplotlib.lines as mlines
    
        # Dummy handles for legend
        tri_handle = mlines.Line2D(
            [], [], marker="^", color="black",
            markersize=8, linestyle="None"
        )
        sq_handle = mlines.Line2D(
            [], [], marker="s", color="black",
            markersize=8, linestyle="None"
        )
    
        # Legend with placeholder text
        ax.legend(
            [tri_handle, sq_handle],
            ["High (>1800m)", "Low (<=1800m)"],
            loc="lower right",
            frameon=True,
            framealpha=0.9,
            facecolor="white",
            edgecolor="black"
        )    
    

    # Grid
    gl = ax.gridlines(draw_labels=True, x_inline=False, y_inline=False)
    gl.top_labels = False
    gl.right_labels = False
    gl.xlines = False
    gl.ylines = False
   
    
    return fig, ax

In [None]:
plot_points(df, color="RMSE", shape="High")
plt.savefig("../outputs/rmse_map_revised.png")

In [None]:
fig, ax = plt.subplots(figsize=(4, 7), ncols = 1, nrows = 2, sharex=True, sharey=True)
params = dict(
    bins=40,          # more bins
    edgecolor='black',
    linewidth=1.0
)
ax[0].hist(df[df.High].RMSE, **params)
ax[0].set_title("High Elevation (>1800)")
ax[0].tick_params(labelbottom=True)
ax[0].set_ylabel("Frequency")
ax[1].hist(df[~df.High].RMSE, **params)
ax[1].set_title("Low Elevation (<=1800)")
ax[1].set_ylabel("Frequency")
ax[1].set_xlabel("RMSE (%)")
plt.savefig("../outputs/elev_hist.png")

In [None]:
# Together
import matplotlib.gridspec as gridspec
fig = plt.figure(figsize=(14, 6))    # wide enough for map + hists
# fig.subplots_adjust(wspace=0.4, hspace=0.4)
gs = gridspec.GridSpec(2, 2, width_ratios=[2.5, 1])  
ax_map = fig.add_subplot(gs[:, 0], projection=ccrs.LambertConformal())
ax_hist1 = fig.add_subplot(gs[0, 1])
ax_hist2 = fig.add_subplot(gs[1, 1])
plot_points(df, color="RMSE", shape="High", fig=fig, ax=ax_map)

params = dict(bins=40, edgecolor='black', linewidth=1.0)

ax_hist1.hist(df[df.High].RMSE, **params)
ax_hist1.set_title("High Elevation (>1800)")
ax_hist1.set_ylabel("Frequency")
ax_hist1.tick_params(labelbottom=False)

ax_hist2.hist(df[~df.High].RMSE, **params)
ax_hist2.set_title("Low Elevation (<=1800)")
ax_hist2.set_xlabel("RMSE (%)")
ax_hist2.set_ylabel("Frequency")

# plt.tight_layout()
plt.savefig("../outputs/map_and_hists.png")



## RNN Errors

Residual plot commented out below, highly memory intensive, didn't end up using in paper. Keeping here for completion of what explorations we made.

Side-by-side histograms of predicted and observed FMC to show distributions.

In [None]:
# df2 = rnn[rnn.rep == 1]

# fig, axs = plt.subplots(1, 2, figsize=(12, 6))
# # Residual Plot
# axs[0].scatter(rnn.preds, rnn.residual, marker="o", alpha=.7)
# axs[0].set_xlabel("Predicted FMC (%)")
# axs[0].set_ylabel("Residual (Observed - Predicted)")
# axs[0].grid(True)
# axs[0].axhline(y=0, linestyle="dashed", color="k")

# # Residual Histogram
# axs[1].hist(rnn.residual, bins=20, edgecolor="k")
# axs[1].set_xlabel("Residual (Observed - Predicted)")
# axs[1].set_ylabel("Frequency")
# axs[1].grid(True)

# plt.tight_layout()
# plt.savefig(osp.join(ml_forecast_dir, "residuals.png"))

In [None]:
#
# Sanity check: 8784 hrs/yr * 16 sts/rep = 140,544 preds if no missing all year. Expect less than this 

print(rnn.groupby("rep").size().mean())

In [None]:
rnn.shape

In [None]:
plt.rcParams.update({
    "axes.labelsize": 18,
    "xtick.labelsize": 16,
    "ytick.labelsize": 16
})

fig, axs = plt.subplots(1, 2, figsize=(12, 4))
# Observed Histogram
axs[0].hist(rnn.fm, bins=42, color="#468a29", edgecolor="k")
axs[0].set_xlabel("Observed FMC (%)")
axs[0].set_ylabel("Frequency")
axs[0].grid(True)
axs[0].set_xlim(0,40)

# Predicted Histogram
axs[1].hist(rnn.preds, bins=42, edgecolor="k")
axs[1].set_xlabel("Predicted FMC (%)", )
axs[1].set_ylabel("Frequency")
axs[1].grid(True)
axs[1].set_xlim(0,40)
plt.tight_layout()
plt.savefig(osp.join(ml_forecast_dir, "fm_hist.png"), dpi=400)

In [None]:
# Stratefying by 
# Low (0-10) Medium (10-20) High (20-30) Very High (30+)
bins = [0, 10, 20, float('inf')]
labels = ['Low (0-10)', 'Medium (10-20)', 'High (20+)']
rnn["fm_level"] = pd.cut(rnn["fm"], bins=bins, labels=labels, right=False)
bias = rnn.groupby(["rep", "fm_level"], observed=True)["residual"].agg("mean")
summary = pd.merge(
    bias.groupby("fm_level", observed=True).mean(),
    bias.groupby("fm_level", observed=True).std(),
    on="fm_level"
).reset_index()
summary.columns = ["FMC Level", "Bias", "Std"]
summary

## Other Visualizations

Didn't make it into the paper, but maybe useful graphics in a folowup. Also keeping for completion.

Plotting some good and bad examples based on by station error.

In [None]:
st = "CPPC2"
start = pd.to_datetime('2024-09-19 00:00:00+00:00')
end = start + relativedelta(hours=48-1)

df2 = rnn[rnn.stid == st]
df2.loc[:,"date_time"] = pd.to_datetime(df2.date_time)
df2 = df2[(df2.date_time >= start) & (df2.date_time <= end)]

mse = df2.groupby("rep")["squared_error"].mean()
print(f"MSE for {st}:")
print(f"    {mse.mean()}, ({mse.min()} to {mse.max()})")

In [None]:
mean_pred = df2.groupby(["date_time"]).preds.mean()
std = df2.groupby(["date_time"]).preds.std()
high = mean_pred + std
low = mean_pred - std
x = df2.date_time.unique()
plot_one(ml_data, st=st, m=mean_pred, start_time = start, end_time=end, title2=f"MSE: {mse.mean().round(2)}")
plt.fill_between(x, low, high, color="k", alpha=0.2)

In [None]:
st = "TT562"
start = pd.to_datetime('2024-01-01 00:00:00+00:00')
end = start + relativedelta(hours=48-1)

df2 = rnn[rnn.stid == st]
df2.loc[:,"date_time"] = pd.to_datetime(df2.date_time)
df2 = df2[(df2.date_time >= start) & (df2.date_time <= end)]

mse = df2.groupby("rep")["squared_error"].mean()
print(f"MSE for {st}:")
print(f"    {mse.mean()}, ({mse.min()} to {mse.max()})")

In [None]:
mean_pred = df2.groupby(["date_time"]).preds.mean()
std = df2.groupby(["date_time"]).preds.std()
high = mean_pred + std
low = mean_pred - std
x = df2.date_time.unique()
plot_one(ml_data, st=st, m=mean_pred, start_time = start, end_time=end, title2=f"MSE: {mse.mean().round(2)}")
plt.fill_between(x, low, high, color="k", alpha=0.2, label=rf"$\pm$ 1 std. ({df2.rep.unique().shape[0]} reps)")
plt.legend(loc='upper left', bbox_to_anchor=(1, 0.5))

---

In [None]:
st = "RLAS2"
start = pd.to_datetime('2024-02-08 00:00:00+00:00')
end = start + relativedelta(hours=48-1)

df2 = rnn[rnn.stid == st]
df2.loc[:,"date_time"] = pd.to_datetime(df2.date_time)
df2 = df2[(df2.date_time >= start) & (df2.date_time <= end)]

mse = df2.groupby("rep")["squared_error"].mean()
print(f"MSE for {st}:")
print(f"    {mse.mean()}, ({mse.min()} to {mse.max()})")

In [None]:
mean_pred = df2.groupby(["date_time"]).preds.mean()
std = df2.groupby(["date_time"]).preds.std()
high = mean_pred + std
low = mean_pred - std
x = df2.date_time.unique()
plot_one(ml_data, st=st, m=mean_pred, start_time = start, end_time=end, title2=f"MSE: {mse.mean().round(2)}")
plt.fill_between(x, low, high, color="k", alpha=0.2, label=rf"$\pm$ 1 std. ({df2.rep.unique().shape[0]} reps)")
plt.legend(loc='upper left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.savefig(osp.join(ml_forecast_dir, "RLAS2_Feb824.png"))

In [None]:
fm = df2[df2.rep == 23].fm
delta0 = fm.iloc[0] - mean_pred.iloc[0]
print(f"MSE for mean prediction: {mean_squared_error(mean_pred, fm)}")
print(f"Error at t=0: {delta0}")
print(f"MSE for translated mean prediction: {mean_squared_error(mean_pred+delta0, fm)}")

---

In [None]:
st = "C3SKI"
start = pd.to_datetime('2024-10-19 00:00:00+00:00')
end = start + relativedelta(hours=48-1)

df2 = rnn[rnn.stid == st]
df2.loc[:,"date_time"] = pd.to_datetime(df2.date_time)
df2 = df2[(df2.date_time >= start) & (df2.date_time <= end)]

mse = df2.groupby("rep")["squared_error"].mean()
print(f"MSE for {st}:")
print(f"    {mse.mean()}, ({mse.min()} to {mse.max()})")

In [None]:
mean_pred = df2.groupby(["date_time"]).preds.mean()
std = df2.groupby(["date_time"]).preds.std()
high = mean_pred + std
low = mean_pred - std

plot_one(ml_data, st=st, m=mean_pred, start_time = start, end_time=end, title2=f"MSE: {mse.mean().round(2)}")
x = df2.date_time.unique()
plt.fill_between(x, low, high, color="k", alpha=0.2, label=rf"$\pm$ 1 std. ({df2.rep.unique().shape[0]} reps)")
plt.legend(loc='upper left', bbox_to_anchor=(1, 0.5))

## TS Plots for CV Graphic

Graphic visualizing spatiotemporal cross validation, composes a map of train/test split to show spatial split with a couple time series to show temporal ordering of train/test

In [None]:
start = pd.to_datetime('2023-01-01 00:00:00+00:00')
end = pd.to_datetime('2024-12-31 23:00:00+00:00')
x = time_range(start, end)
h2 = pd.to_datetime('2024-01-01 00:00:00+00:00')

In [None]:
st_list = ["CHRC2", "TT689"]

In [None]:
def p(st):
    df2 = ml_data[st]["data"]
    df2.loc[:,"date_time"] = pd.to_datetime(df2.date_time); df2 = df2.sort_values("date_time")
    plt.figure(figsize=(12,4))
    plt.grid()
    plt.xticks(fontsize=16, rotation=90)
    plt.yticks(fontsize=16) 
    plt.ylim(0,32)
    plt.ylabel("FMC (%)", fontsize=16)
    plt.plot(df2.date_time, df2.fm, color="#468a29")
    plt.tight_layout()
    plt.savefig(osp.join(ml_forecast_dir, f"{st}.png"))

for st in st_list:
    p(st)

## Errors over Year

Aggregate by month, plot over year. Also didn't make it into the paper, but interesting for future improvements

In [None]:
df = by_dt
df["date_time"] = pd.to_datetime(df.date_time)
df["month"] = df.date_time.dt.month
df = df.groupby(["Model", "month"]).mean().reset_index()

In [None]:
# Plot each model
plt.figure()
for model in df["Model"].unique():
    subset = df[df["Model"] == model]
    plt.plot(subset["month"], subset["rmse_mean"], label=model)

plt.xlabel("Month")
plt.ylabel("RMSE Mean")
plt.title("RMSE Mean by Month and Model")
plt.legend()
plt.grid(True)
plt.show()

## Reps

Checking number of replications stations get included in. Theoretically should be governed by repeated samlping with replacement, or a binomial distribution for number of times selected with mean of N times probability of selected. This was a sanity check for the replication procedure, not included in the report but someone might check.

In [None]:
rnn.groupby("stid")["rep"].nunique().mean()

In [None]:
rnn.stid.unique().shape

In [None]:
# total individuals
N = 151

# sample size (10% of N, rounded down)
sample_size = int(np.ceil((0.10 * N)))  # or use math.floor if you want explicit rounding

# number of draws
draws = 500

# probability of selecting a given individual in one sample
p = sample_size / N

# expected number of times one individual is selected
expected_times = draws * p

print(f"Expected number of times one individual is selected: {expected_times:.2f}")

In [None]:
rnn.groupby("rep")["stid"].nunique()