# Report Figures

Notebook to generate tables and figures used in final paper. Model outputs generated in Alderaan and staged on OpenWFM. 

All outputs kept in MSE from code. Leaving as a later option to calculate RMSE, since it depends when you take root versus when you aggregate with mean over replications and locations.

Pandas dataframes converted to latex format with particular formatting and then manually copied into latex report.

In [None]:
import matplotlib.pyplot as plt
from datetime import datetime
import sys
import os.path as osp
import pandas as pd
import numpy as np
from scipy import stats
import itertools
sys.path.append("../src")
from utils import Dict, read_yml, str2time, print_dict_summary, read_pkl, retrieve_url
# from viz import map_var, create_gif

In [None]:
base_url = "https://demo.openwfm.org/web/data/fmda/analysis_results/test_results/"
destination_dir = "../outputs/report_materials"

In [None]:
retrieve_url(
    url = osp.join(base_url, "forecast_errs.csv"),
    dest_path = osp.join(destination_dir, "forecast_errs.csv")
)

retrieve_url(
    url = osp.join(base_url, "ml_data.pkl"),
    dest_path = osp.join(destination_dir, "ml_data.pkl")
)

In [None]:
df = pd.read_csv(osp.join(base_url, "forecast_errs.csv"), index_col=0)
ml_data = read_pkl(osp.join(destination_dir, "ml_data.pkl"))

## Model Summary Tables

In [None]:
df

## Overall Error

Averaged over forecast periods and location

In [None]:
means = df.mean(axis=0)
stds = df.std(axis=0)
overall_errs_df = pd.DataFrame({"Mean Error": means, "(Std)": stds})
overall_errs_df = overall_errs_df.round(3)
overall_errs_df = overall_errs_df.reset_index(names="Model")
overall_errs_df

In [None]:
overall_errs_df = overall_errs_df.map(lambda x: f"{x:.3f}".rstrip("0").rstrip(".") if isinstance(x, float) else x)

print(overall_errs_df.to_latex(index=False))

## T Tests

For significant differences among error for forecast periods, paired by forecast period. Compare to Bonferroni corrected p-value

In [None]:
col_pairs = list(itertools.combinations(df.columns, 2))

# Apply t-test to each pair
ttests = {
    (col1, col2): stats.ttest_rel(df[col1], df[col2])
    for col1, col2 in col_pairs
}

In [None]:
ttests

In [None]:
ttests[('ODE', 'XGB')]

In [None]:
print(f"Number of ttests run: {len(col_pairs)}")
print(f"Bonferroni Corrected Thresholds:")
print(f"    Threshold 0.05 :  Corrected {0.05/len(col_pairs)}")
print(f"    Threshold 0.01 :  Corrected {0.01/len(col_pairs)}")

In [None]:
p_values = { (col1, col2): stats.ttest_rel(df[col1], df[col2]).pvalue for col1, col2 in col_pairs }

# Create an empty DataFrame for storing p-values
pval_df = pd.DataFrame(index=df.columns, columns=df.columns, dtype=float)
pval_df = pd.DataFrame(index=df.columns, columns=df.columns, dtype=str)
pval_df = pval_df.fillna("-")

# Fill upper triangle with p-values
for (col1, col2), pval in p_values.items():
    pval_df.loc[col1, col2] = f"{pval:.4f}" if pval >= 0.001 else f"{pval:.2e}"

In [None]:
pval_df

In [None]:
# Convert DataFrame to LaTeX with full grid cells
latex_table = pval_df.to_latex(index=True, escape=False, na_rep="-", column_format="|" + "c|" * (len(pval_df.columns) + 1))

# Adjust LaTeX formatting to ensure full grid lines
latex_table = latex_table.replace("\\toprule", "\\hline")
latex_table = latex_table.replace("\\midrule", "\\hline")
latex_table = latex_table.replace("\\bottomrule", "\\hline")
print(latex_table)

## Skill Scores

Based on overall Error

In [None]:
means = df.mean(axis=0)
stds = df.std(axis=0)
overall_errs_df = pd.DataFrame({"Mean Error": means, "(Std)": stds})
overall_errs_df = overall_errs_df.round(3)
overall_errs_df

In [None]:
err_model = overall_errs_df.loc['RNN'].iloc[0]
err_baseline1 = overall_errs_df.loc['CLIMATOLOGY'].iloc[0]
err_baseline2 = overall_errs_df.loc['ODE'].iloc[0]
err_baseline3 = overall_errs_df.loc['XGB'].iloc[0]

print(f"Skill Score (Clim Baseline): ")
print(f"    {1-err_model/err_baseline1}")
print()
print(f"Skill Score (Ode Baseline): ")
print(f"    {1-err_model/err_baseline2}")
print()
print(f"Skill Score (XGB Baseline): ")
print(f"    {1-err_model/err_baseline3}")
print()


In [None]:
overall_errs_df.loc["RNN", "Mean Error"]

In [None]:
error_rnn = overall_errs_df.loc["RNN", "Mean Error"]
error_baseline = overall_errs_df.loc[overall_errs_df.index != "RNN", "Mean Error"]
skill_score = 1 - (error_rnn / error_baseline)
result_df = pd.DataFrame({"Skill Score": skill_score})
result_df = result_df.reset_index(names="Baseline")
result_df = result_df.round(3)
result_df

In [None]:
result_df = result_df.map(lambda x: f"{x:.3f}".rstrip("0").rstrip(".") if isinstance(x, float) else x)

print(result_df.to_latex(index=False))

## Plots

### Basic example timeseries plot for one location

In [None]:
st = "CHAC2"
start_time = pd.Timestamp("2024-01-01", tz="UTC")
end_time = pd.Timestamp("2024-01-07", tz="UTC")
timestamps = ml_data[st]["times"]
inds = np.where((timestamps >= start_time) & (timestamps <= end_time))[0]

In [None]:
y = ml_data[st]["data"]["fm"].to_numpy()[inds]
x = ml_data[st]["times"][inds]
plt.plot(x, y, linestyle='-',c='#468a29',label='FM Observed')
plt.xlabel("Hour")
plt.ylabel("FMC (%)")
plt.title(f"Observed FMC at RAWS {st}")
plt.xticks(rotation=90)
plt.grid()

plt.savefig(osp.join(destination_dir, "ts.png"))