In [None]:
import polars as pl
from datetime import datetime

In [None]:
dataset_path = "data/windpower_ensemble_dataset.parquet"
data = pl.scan_parquet(dataset_path).filter(pl.col("local_power").is_not_null())
data.select("time_ref", "sid", "windpark_name", "bidding_area").unique().group_by(
    "bidding_area"
).agg(n=pl.len()).collect()

In [None]:
local_preds = pl.scan_parquet("data/local_power_pred.parquet")

power = pl.scan_parquet("data/windpower_ensemble_dataset.parquet").select(
    "time_ref",
    "time",
    "lt",
    "sid",
    "windpark_name",
    "local_power",
    "power",
    "operating_power_max",
)


df_area_eval = (
    local_preds.join(power, on=["time_ref", "time", "sid", "windpark_name"])
    .group_by("bidding_area", "time_ref", "time", "lt", maintain_order=True)
    .agg(
        pl.col("power").first(),
        pl.col("operating_power_max").sum(),
        *(
            pl.col(f"local_power_pred_{k:02d}").sum().alias(f"power_pred_{k:02d}")
            for k in range(15)
        ),
    )
    .collect()
)
df_area_eval

In [None]:
import plotly.express as px

px.histogram(
    df_area_eval.with_columns(
        relative_power=pl.col("power") / pl.col("operating_power_max")
    ),
    "relative_power",
    color="bidding_area",
    barmode="overlay",
)

In [None]:
def ensemble_mean(variable):
    return pl.mean_horizontal(pl.col(f"{variable}_{k:02d}" for k in range(15))).alias(
        f"{variable}_mean"
    )


def ensemble_std(variable):
    return (
        pl.concat_list(pl.col(f"{variable}_{k:02d}" for k in range(15)))
        .list.std()
        .alias(f"{variable}_std")
    )


def ensemble_range(variable):
    return (
        pl.max_horizontal(pl.col(f"{variable}_{k:02d}" for k in range(15)))
        - pl.min_horizontal(pl.col(f"{variable}_{k:02d}" for k in range(15)))
    ).alias(f"{variable}_range")


bidding_area = "ELSPOT NO3"
df = (
    df_area_eval.filter(pl.col("bidding_area") == bidding_area)
    .select(
        "time_ref",
        "time",
        "lt",
        "power",
        ensemble_mean("power_pred"),
        ensemble_std("power_pred"),
        ensemble_range("power_pred"),
    )
    .drop_nulls()
)
train_cutoff = datetime(2024, 1, 1)
val_cutoff = datetime(2025, 1, 1)
df_train = df_area_eval.filter(
    pl.col("time_ref") <= train_cutoff, pl.col("time_ref") < val_cutoff
)
df_val = df_area_eval.filter(pl.col("time_ref") >= val_cutoff)
t = df["power_pred_mean"].clip(0, None).to_numpy()
s = df["power_pred_std"].to_numpy()
T = df["power"].to_numpy()

In [None]:
import numpy as np
import pymc as pm
import arviz as az

t_mu, t_sd = t.mean(), t.std()
s_mu, s_sd = s.mean(), s.std()
t_std = (t - t_mu) / t_sd
s_std = (s - s_mu) / s_sd

with pm.Model() as agg_norm:
    beta0 = pm.Normal("beta0", 0, t_sd)
    beta1 = pm.HalfNormal("beta1", 1 / 0.8)
    gamma0 = pm.Normal("gamma0", 0, s_sd)
    gamma1 = pm.HalfNormal("gamma1", 1 / 0.8)

    μ = pm.Deterministic("mu", beta0 + beta1 * t)
    sigma = pm.Deterministic("sigma", pm.math.log1pexp(gamma0 + gamma1 * s))
    y = pm.Normal("T", mu=μ, sigma=sigma, observed=T)

    # Fast inference (works very well for linear-Gaussian):
    idata = pm.fit(n=20_000, method="advi").sample(2_000)
    # idata = pm.sample(2_000, return_inferencedata=True)
    # idata = pm.sample_prior_predictive(draws=50)
    ppc = pm.sample_posterior_predictive(idata, var_names=["T", "mu", "sigma"])


In [None]:
with agg_norm:
    idata.extend(pm.sample(1000, tune=2000))

# az.plot_trace(idata)
idata

In [None]:
az.plot_trace(idata, var_names=("beta0", "beta1"))

In [None]:
da = ppc.posterior_predictive["T"]
T_point = da.mean(("chain", "draw"))
lo95, hi95 = da.quantile([0.025, 0.975], dim=("chain", "draw"))

In [None]:
df_plot = (
    df.with_columns(
        power_pred=T_point.values, lower_bound=lo95.values, upper_bound=hi95.values
    )
    .filter(pl.col("lt") < 24)
    # .filter(pl.col("time_ref") == datetime(2024, 10, 20, 9))
    .sort("time")
)
df_plot

In [None]:
import plotly.graph_objs as go

fig = go.Figure(
    [
        go.Scatter(
            name="y_true",
            x=df_plot["time"],
            y=df_plot["power"],
            mode="lines",
            line=dict(color="rgb(237, 55, 31)"),
        ),
        go.Scatter(
            name="y_pred",
            x=df_plot["time"],
            y=df_plot["power_pred"],
            mode="lines",
            line=dict(color="rgb(31, 119, 180)"),
        ),
        go.Scatter(
            name="y_pred_uncalibrated",
            x=df_plot["time"],
            y=df_plot["power_pred_mean"],
            mode="lines",
            line=dict(color="rgb(27, 148, 9)"),
        ),
        go.Scatter(
            name="Upper Bound",
            x=df_plot["time"],
            y=df_plot["upper_bound"],
            mode="lines",
            marker=dict(color="#444"),
            line=dict(width=0),
            showlegend=False,
        ),
        go.Scatter(
            name="Lower Bound",
            x=df_plot["time"],
            y=df_plot["lower_bound"],
            marker=dict(color="#444"),
            line=dict(width=0),
            mode="lines",
            fillcolor="rgba(68, 68, 68, 0.3)",
            fill="tonexty",
            showlegend=False,
        ),
    ]
)
fig.update_layout(
    yaxis=dict(title=dict(text="Power")),
    title=dict(text="Continuous, variable value error bars"),
    hovermode="x",
)
fig.show()