In [None]:
%cd ~/projects/wind/

In [None]:
import polars as pl
# import torch
import plotly.express as px
from datetime import datetime, timedelta
import numpy as np

In [None]:
def fill_gaps(df: pl.LazyFrame) -> pl.LazyFrame:
    times = df.select(
        pl.datetime_range(pl.col("time").min(), pl.col("time").max(), "1h")
    )
    filled = times.join(df, on="time", how="left").fill_null(strategy="forward")
    return filled


lookback = 48
forecast_lead_time = 39
forecast_window = 24
bidding_areas = [
    "ELSPOT NO1",
    "ELSPOT NO2",
    "ELSPOT NO3",
    "ELSPOT NO4",
]
windpower = (
    pl.scan_parquet("data/wind_power_per_bidzone.parquet")
    .rename({"__index_level_0__": "time"})
    .filter(pl.col("time") >= datetime(2021, 1, 1))
    .sort("time")
    .pipe(fill_gaps)
    .with_columns(
        lookback_start=pl.col("time").shift(lookback - 1),
        window_start=pl.col("time").shift(-forecast_lead_time),
        window_stop=pl.col("time").shift(-(forecast_lead_time + forecast_window - 1)),
    )
)
capacity = (
    windpower.select(pl.col(f"ELSPOT NO{k}").max() for k in range(1, 5))
    .collect()
    .to_numpy()[0]
)
windpower = windpower.with_columns(
    pl.col(f"ELSPOT NO{k}").forward_fill() / capacity[k - 1] for k in range(1, 5)
).collect()

windpower.drop_nulls()

## Auto-Regressive

In [None]:
bidding_area = "ELSPOT NO3"
ts = windpower.filter(pl.col("time").dt.year() == 2024).sort("time").get_column(bidding_area).to_numpy()
ts

In [None]:
from statsmodels.tsa.stattools import adfuller

adf_test = adfuller(ts)
# Output the results
print('ADF Statistic: %f' % adf_test[0])
print('p-value: %f' % adf_test[1])

In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import numpy as np
ax = plot_acf(ts, lags=np.arange(39, 120))
plt.gca().set_ylim(-0.1, 0.5)
plot_pacf(ts, lags=np.arange(39, 120))
plt.gca().set_ylim(-0.05, 0.05)
plt.show()

In [None]:
from statsmodels.tsa.ar_model import AutoReg
import seaborn as sns
start = 1000
n_train = 500
n_val = 100
t_train = np.arange(n_train)
t_val = np.arange(n_train, n_train + n_val)
train = ts[start:start + n_train]
val = ts[start + n_train:start + n_train + n_val]
ar_model = AutoReg(train, lags=2).fit()

out = 'AIC: {0:0.3f}, HQIC: {1:0.3f}, BIC: {2:0.3f}'
print(out.format(ar_model.aic, ar_model.hqic, ar_model.bic))

pred = ar_model.get_prediction(end=n_train+100+n_val-1)
f, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 4))
sns.lineplot(x=t_val, y=val, marker='o', label='test', color='grey')
sns.lineplot(x=t_train, y=train, marker='o', label='train')
sns.lineplot(x=t_val, y=pred.se_mean, marker='o', label='pred')
# ax.set_xlim([sample.timestamp.iloc[0], sample.timestamp.iloc[-1]])
ax.set_title('Sample Time Series')
plt.tight_layout()
plt.show()

In [None]:
res = ar_model.get_prediction(start=n_train, end=n_train+n_val)
res.se_mean

In [None]:
import pymc as pm

# Create an AR of order 2, with a constant term
with pm.Model() as AR2:
    # The first coefficient will be the constant term
    coefs = pm.Normal("coefs", 0, size=3)
    # We need one init variable for each lag, hence size=3
    init = pm.Normal.dist(5, size=2)
    ar2 = pm.AR("ar2", coefs, sigma=1.0, init_dist=init, constant=True, steps=500)

In [None]:
## Set up a dictionary for the specification of our priors
## We set up the dictionary to specify size of the AR coefficients in
## case we want to vary the AR lags.
priors = {
    "coefs": {"mu": [10, 0.2], "sigma": [0.1, 0.1], "size": 2},
    "sigma": 8,
    "init": {"mu": 9, "sigma": 0.1, "size": 1},
}

## Initialise the model
with pm.Model() as AR:
    pass

## Define the time interval for fitting the data
t_data = list(range(len(ar1_data)))
## Add the time interval as a mutable coordinate to the model to allow for future predictions
AR.add_coord("obs_id", t_data, mutable=True)

with AR:
    ## Data containers to enable prediction
    t = pm.MutableData("t", t_data, dims="obs_id")
    y = pm.MutableData("y", ar1_data, dims="obs_id")

    # The first coefficient will be the constant term but we need to set priors for each coefficient in the AR process
    coefs = pm.Normal("coefs", priors["coefs"]["mu"], priors["coefs"]["sigma"])
    sigma = pm.HalfNormal("sigma", priors["sigma"])
    # We need one init variable for each lag, hence size is variable too
    init = pm.Normal.dist(
        priors["init"]["mu"], priors["init"]["sigma"], size=priors["init"]["size"]
    )
    # Steps of the AR model minus the lags required
    ar1 = pm.AR(
        "ar",
        coefs,
        sigma=sigma,
        init_dist=init,
        constant=True,
        steps=t.shape[0] - (priors["coefs"]["size"] - 1),
        dims="obs_id",
    )

    # The Likelihood
    outcome = pm.Normal("likelihood", mu=ar1, sigma=sigma, observed=y, dims="obs_id")
    ## Sampling
    idata_ar = pm.sample_prior_predictive()
    idata_ar.extend(pm.sample(2000, random_seed=100, target_accept=0.95))
    idata_ar.extend(pm.sample_posterior_predictive(idata_ar))

## Pytorch

In [None]:
times = (
    windpower.filter(pl.col("time").dt.hour() == 9)
    .select("time", "lookback_start", "window_start", "window_stop")
    .drop_nulls()
)
N = len(times)
X = torch.zeros((N, 4, lookback), dtype=torch.float32)
y = torch.zeros((N, 4, forecast_window), dtype=torch.float32)
for i, (time, lookback_start, window_start, window_stop) in enumerate(
    times.iter_rows()
):
    print(time, lookback_start, window_start, window_stop)
    X[i] = (
        windpower.filter(
            pl.col("time") >= lookback_start,
            pl.col("time") <= time,
        )
        .select(bidding_areas)
        .to_torch()
        .t_()
    )

    y[i] = (
        windpower.filter(
            pl.col("time") >= window_start,
            pl.col("time") <= window_stop,
        )
        .select(bidding_areas)
        .to_torch()
        .t_()
    )

In [None]:
device = "cuda"

val_frac = 0.25
train_frac = 1 - val_frac
n_train = int(N * train_frac)
val_start = n_train + (forecast_lead_time + forecast_window - 1)

X_train = X[:n_train].to(device)
y_train = y[:n_train].to(device)

X_val = X[val_start:].to(device)
y_val = y[val_start:].to(device)
n_val = X_val.shape[0]

In [None]:
n_train, n_val

In [None]:
loss_scale = torch.Tensor(capacity).unsqueeze(0).unsqueeze(-1).to(device)

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class LinearBlock(nn.Module):
    def __init__(self, width, dropout):
        super().__init__()
        # self.norm = nn.LayerNorm(width)
        self.linear = nn.Linear(width, width)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h = F.sigmoid(self.linear(x))
        h = self.dropout(h)
        return h


class TSModel(nn.Module):
    def __init__(self, in_dim, out_dims, width=32, depth=4, dropout=0.0):
        super().__init__()
        self.in_dim = in_dim
        self.out_dims = out_dims
        self.depth = depth

        self.stem = nn.Sequential(
            nn.Linear(in_dim, width),
            nn.Sigmoid(),
            nn.Dropout(dropout),
            # nn.Linear(in_dim, width),
            # nn.Dropout(dropout),
        )
        if self.depth > 0:
            self.layers = nn.Sequential(
                *(LinearBlock(width, dropout) for _ in range(depth))
            )

        self.head = nn.Sequential(nn.Linear(width, out_dims), nn.Sigmoid())
        # self.head = nn.Sequential(nn.Linear(width, out_dims))

    def forward(self, x):
        B = x.shape[0]
        z = torch.flatten(x, 1)
        z = self.stem(z)
        if self.depth > 0:
            z = self.layers(z)
        z = self.head(z)
        out = z.view(B, 4, -1)
        return out


class LogitRegressor(nn.Module):
    def __init__(self, in_dim, out_dims):
        super().__init__()
        self.weights = nn.Sequential(nn.Linear(in_dim, out_dims), nn.Sigmoid())

    def forward(self, x):
        B = x.shape[0]
        z = torch.flatten(x, 1)
        z = self.weights(z)
        out = z.view(B, 4, -1)
        return out

In [None]:
from torch.utils.tensorboard import SummaryWriter
import os

batch_size = 32
epochs = 500
lr = 1e-4
patience = 20
width = 128
depth = 0
dropout = 0.0

writer = None
os.makedirs("runs", exist_ok=True)
run_name = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}__ts"
writer = SummaryWriter(log_dir=os.path.join("runs", run_name))

model = TSModel(
    4 * lookback, 4 * forecast_window, width=width, depth=depth, dropout=dropout
).to(device)
# model = LogitRegressor(
#     4 * lookback, 4 * forecast_window).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

best_val = float("inf")
best_state = None
pat_since_improve = 0
for epoch in range(1, epochs + 1):
    model.train()
    train_sse = torch.tensor(0.0, device=device)
    for i in range(0, n_train, batch_size):
        xb, yb = X_train[i : i + batch_size], y_train[i : i + batch_size]

        preds = model(xb)
        loss = nn.functional.mse_loss(preds, yb)
        # print(preds)
        # print(yb)
        # print(preds.shape, yb.shape)
        # print(loss)

        optimizer.zero_grad()
        loss.backward()
        # nn.utils.clip_grad_norm_(model.parameters(), 20.0)
        optimizer.step()
        train_sse += loss.detach().float() * yb.numel()

    avg_train_loss = train_sse.item() / n_train

    model.eval()
    val_sse = torch.tensor(0.0, device=device)
    with torch.no_grad():
        for i in range(0, n_val, batch_size):
            xb, yb = X_val[i : i + batch_size], y_val[i : i + batch_size]
            preds = model(xb)
            val_sse += nn.functional.mse_loss(preds, yb, reduction="sum")

    avg_val_loss = val_sse.item() / n_val
    val_rmse = torch.sqrt(val_sse / n_val).item()
    print(
        f"Epoch {epoch:02d} | train MSE: {avg_train_loss:.4f} | val MSE: {avg_val_loss:.4f} | val RMSE: {val_rmse:.3f}"
    )
    writer.add_scalar("Loss/train_MSE", avg_train_loss, epoch)
    writer.add_scalar("Loss/val_MSE", avg_val_loss, epoch)
    writer.add_scalar("Metrics/val_RMSE", val_rmse, epoch)
    writer.flush()

    if avg_val_loss < best_val - 1e-4:
        best_val = avg_val_loss
        pat_since_improve = 0
        best_state = {
            k: v.detach().cpu().clone() for k, v in model.state_dict().items()
        }
    else:
        pat_since_improve += 1
        if pat_since_improve >= patience:
            print(f"Early stop at epoch {epoch} (best val={best_val:.4f})")
            break

In [None]:
with torch.no_grad():
    pred = model(X_val)

In [None]:
lead_times = [
    str(lt) for lt in range(forecast_lead_time, forecast_lead_time + forecast_window)
]
df_pred = (
    pl.concat(
        [
            pl.DataFrame(pred[:, k, :].cpu(), lead_times).with_columns(
                bidding_area=pl.lit(f"ELSPOT NO{k + 1}"),
                time_ref=times["time"][n_train:],
            )
            for k in range(4)
        ]
    )
    .unpivot(
        index=["bidding_area", "time_ref"], variable_name="lt", value_name="y_pred"
    )
    .with_columns(
        lt=pl.col("lt").cast(int), time=pl.col("time_ref") + pl.duration(hours="lt")
    )
)
df_true = (
    pl.concat(
        [
            pl.DataFrame(y_val[:, k, :].cpu(), lead_times).with_columns(
                bidding_area=pl.lit(f"ELSPOT NO{k + 1}"),
                time_ref=times["time"][n_train:],
            )
            for k in range(4)
        ]
    )
    .unpivot(
        index=["bidding_area", "time_ref"], variable_name="lt", value_name="y_true"
    )
    .with_columns(
        lt=pl.col("lt").cast(int), time=pl.col("time_ref") + pl.duration(hours="lt")
    )
)

df = df_true.join(df_pred, on=["time_ref", "time", "lt", "bidding_area"])
df

In [None]:
bidding_area = "ELSPOT NO3"
px.line(
    df.filter(pl.col("bidding_area") == bidding_area)
    .unpivot(index=["time_ref", "time", "lt", "bidding_area"])
    .sort("time"),
    "time",
    "value",
    color="variable",
)

## Bayesian Neural Network

In [None]:
import numpy as np
import pytensor
import pytensor.tensor as pt
import pymc as pm

floatX = pytensor.config.floatX
RANDOM_SEED = 9927
rng = np.random.default_rng(RANDOM_SEED)

In [None]:
bidding_area_index = 0
X_train = X_train[:, bidding_area_index, :].cpu().numpy()
y_train = y_train[:, bidding_area_index, :].cpu().numpy()
X_val = X_val[:, bidding_area_index, :].cpu().numpy()
y_val = y_val[:, bidding_area_index, :].cpu().numpy()

X_train.shape, y_train.shape

In [None]:
def relu(x):
    return pt.switch(pt.lt(x, 0), 0, x)


def construct_nn(batch_size=50):
    n_hidden = 5

    # Initialize random weights between each layer
    init_1 = rng.standard_normal(size=(X_train.shape[1], n_hidden)).astype(floatX)
    init_2 = rng.standard_normal(size=(n_hidden, n_hidden)).astype(floatX)
    init_out_mu = rng.standard_normal(size=(n_hidden, y_train.shape[1])).astype(floatX)
    init_out_sig = rng.standard_normal(size=(n_hidden, y_train.shape[1])).astype(floatX)

    coords = {
        "hidden_layer_1": np.arange(n_hidden),
        "hidden_layer_2": np.arange(n_hidden),
        "train_cols": np.arange(X_train.shape[1]),
        "obs_id": np.arange(X_train.shape[0]),
        "target_cols": np.arange(y_train.shape[1]),
    }

    with pm.Model(coords=coords) as neural_network:
        # Define data variables using minibatches
        X_data = pm.Data("X_data", X_train, dims=("obs_id", "train_cols"))
        Y_data = pm.Data("Y_data", y_train, dims=("obs_id", "target_cols"))

        # Define minibatch variables
        ann_input, ann_output = pm.Minibatch(X_data, Y_data, batch_size=batch_size)

        # Weights from input to hidden layer
        weights_in_1 = pm.Normal(
            "w_in_1", 0, sigma=1, initval=init_1, dims=("train_cols", "hidden_layer_1")
        )

        # Weights from 1st to 2nd layer
        weights_1_2 = pm.Normal(
            "w_1_2",
            0,
            sigma=1,
            initval=init_2,
            dims=("hidden_layer_1", "hidden_layer_2"),
        )

        # Weights from hidden layer to output
        weights_2_out_mu = pm.Normal(
            "w_2_out_mu",
            0,
            sigma=1,
            initval=init_out_mu,
            dims=("hidden_layer_2", "target_cols"),
        )
        weights_2_out_sig = pm.Normal(
            "w_2_out_sig",
            0,
            sigma=1,
            initval=init_out_sig,
            dims=("hidden_layer_2", "target_cols"),
        )

        # Build neural-network using tanh activation function
        act_1 = pt.sigmoid(pt.dot(ann_input, weights_in_1))
        act_2 = pt.sigmoid(pt.dot(act_1, weights_1_2))
        act_out_mu = pt.sigmoid(pt.dot(act_2, weights_2_out_mu))
        act_out_sig = pt.exp(pt.dot(act_2, weights_2_out_sig))
        out_sig = pm.HalfNormal("out_sig", 0, 0.1, dims="target_cols")

        # Binary classification -> Bernoulli likelihood
        out = pm.Normal(
            "out",
            act_out_mu,
            act_out_sig,
            observed=ann_output,
            total_size=X_train.shape[0],  # IMPORTANT for minibatches
        )
    return neural_network


# Create the neural network model
neural_network = construct_nn()

In [None]:
%%time

with neural_network:
    approx = pm.fit(n=10_000)

In [None]:
import matplotlib.pyplot as plt

plt.plot(approx.hist, alpha=0.3)
plt.ylabel("ELBO")
plt.xlabel("iteration")

In [None]:
trace = approx.sample(draws=5000)

In [None]:
def sample_posterior_predictive(X_test, Y_test, trace, n_hidden=5):
    coords = {
        "hidden_layer_1": np.arange(n_hidden),
        "hidden_layer_2": np.arange(n_hidden),
        "train_cols": np.arange(X_test.shape[1]),
        "obs_id": np.arange(X_test.shape[0]),
    }
    with pm.Model(coords=coords):
        ann_input = X_test
        ann_output = Y_test

        weights_in_1 = pm.Flat("w_in_1", dims=("train_cols", "hidden_layer_1"))
        weights_1_2 = pm.Flat("w_1_2", dims=("hidden_layer_1", "hidden_layer_2"))
        weights_2_out = pm.Flat("w_2_out", dims="hidden_layer_2")

        # Build neural-network using tanh activation function
        act_1 = pm.math.tanh(pm.math.dot(ann_input, weights_in_1))
        act_2 = pm.math.tanh(pm.math.dot(act_1, weights_1_2))
        act_out = pm.math.sigmoid(pm.math.dot(act_2, weights_2_out))

        # Binary classification -> Bernoulli likelihood
        out = pm.Bernoulli("out", act_out, observed=ann_output)
        return pm.sample_posterior_predictive(trace)


ppc = sample_posterior_predictive(X_test, Y_test, trace)