In [2]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))


In [17]:
import pandas as pd
import numpy as np

from sklearn.metrics import root_mean_squared_error
from statsmodels.tsa.ar_model import AutoReg

from src.data.dataloader import load_precipitation
from src.indices.spi import compute_spi
from src.splits.temporal import split_pre_post


In [15]:
with open("../data/curated/gauges.txt") as f:
    gauges = [line.strip() for line in f if line.strip()]

print(f"Number of gauges: {len(gauges)}")


Number of gauges: 18


In [9]:
def persistence_forecast(series):
    return series.shift(1)


In [None]:
def rmse(y_true, y_pred):
    return root_mean_squared_error(y_true, y_pred)


In [12]:
def extreme_rmse(y_true, y_pred, threshold=-1.5):
    mask = (
        (y_true < threshold) &
        y_true.notna() &
        y_pred.notna()
    )
    if mask.sum() < 5:  # guard against tiny samples
        return np.nan
    return rmse(y_true[mask], y_pred[mask])


In [18]:
results = []

for gauge in gauges:
    try:
        precip = load_precipitation(gauge)

        # Compute SPI
        spi3 = compute_spi(precip, scale=3)
        spi6 = compute_spi(precip, scale=6)

        for scale, spi in [("SPI-3", spi3), ("SPI-6", spi6)]:

            train, test = split_pre_post(spi, split_year=2000)

            # ---------- Persistence ----------
            p_train_pred = persistence_forecast(train)
            p_test_pred  = persistence_forecast(test)

            p_train_rmse = rmse(train.iloc[1:], p_train_pred.iloc[1:])
            p_test_rmse  = rmse(test.iloc[1:],  p_test_pred.iloc[1:])
            p_ext_rmse   = extreme_rmse(test.iloc[1:], p_test_pred.iloc[1:])

            # ---------- AR(1) ----------
            ar1 = AutoReg(train, lags=1, old_names=False).fit()

            ar_train_pred = ar1.predict(
                start=train.index[1],
                end=train.index[-1],
                dynamic=False
            )
            ar_test_pred = ar1.predict(
                start=test.index[0],
                end=test.index[-1],
                dynamic=False
            )

            ar_train_rmse = rmse(train.loc[ar_train_pred.index], ar_train_pred)
            ar_test_rmse  = rmse(test.loc[ar_test_pred.index], ar_test_pred)
            ar_ext_rmse   = extreme_rmse(
                test.loc[ar_test_pred.index],
                ar_test_pred
            )

            results.append({
                "gauge": gauge,
                "scale": scale,

                "persistence_train_rmse": p_train_rmse,
                "persistence_test_rmse":  p_test_rmse,
                "persistence_extreme_rmse": p_ext_rmse,

                "ar1_train_rmse": ar_train_rmse,
                "ar1_test_rmse":  ar_test_rmse,
                "ar1_extreme_rmse": ar_ext_rmse,

                "ar1_rmse_inflation": ar_test_rmse / ar_train_rmse
            })

    except Exception as e:
        print(f"Gauge {gauge} failed: {e}")


Gauge 03004 failed: got an unexpected keyword argument 'squared'
Gauge 03008 failed: got an unexpected keyword argument 'squared'
Gauge 03014 failed: got an unexpected keyword argument 'squared'
Gauge 03028 failed: got an unexpected keyword argument 'squared'
Gauge 03030 failed: got an unexpected keyword argument 'squared'
Gauge 03037 failed: got an unexpected keyword argument 'squared'
Gauge 03038 failed: got an unexpected keyword argument 'squared'
Gauge 03039 failed: got an unexpected keyword argument 'squared'
Gauge 03053 failed: got an unexpected keyword argument 'squared'
Gauge 03056 failed: got an unexpected keyword argument 'squared'
Gauge 03059 failed: got an unexpected keyword argument 'squared'
Gauge 03061 failed: got an unexpected keyword argument 'squared'
Gauge 03064 failed: got an unexpected keyword argument 'squared'
Gauge 03070 failed: got an unexpected keyword argument 'squared'
Gauge 03074 failed: got an unexpected keyword argument 'squared'
Gauge 03078 failed: got a