In [17]:
import numpy as np
import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn

import plotly.graph_objects as go


In [5]:

N_LAGS = 7
HORIZON = 1
BATCH_SIZE = 10

mvtseries = pd.read_csv(
    "../assets/daily_multivariate_timeseries.csv",
    parse_dates=["datetime"],
    index_col="datetime",
)

num_vars = mvtseries.shape[1] + 1


def create_training_set(
    data: pd.DataFrame,
    n_lags: int,
    horizon: int,
    test_size: float = 0.2,
    batch_size: int = 16,
):
    data["target"] = data["Incoming Solar"]
    data["time_index"] = np.arange(len(data))
    data["group_id"] = 0  # Assuming a single group for simplicity

    time_indices = data["time_index"].values

    train_indices, _ = train_test_split(
        time_indices, test_size=test_size, shuffle=False
    )

    train_indices, _ = train_test_split(train_indices, test_size=0.1, shuffle=False)

    train_df = data.loc[data["time_index"].isin(train_indices)]
    train_df_mod = train_df.copy()

    target_scaler = StandardScaler()
    target_scaler.fit(train_df_mod[["target"]])
    train_df_mod["target"] = target_scaler.transform(train_df_mod[["target"]])
    train_df_mod = train_df_mod.drop("Incoming Solar", axis=1)

    feature_names = [
        col for col in data.columns if col != "target" and col != "Incoming Solar"
    ]

    training_dataset = TimeSeriesDataSet(
        train_df_mod,
        time_idx="time_index",
        target="target",
        group_ids=["group_id"],
        max_encoder_length=n_lags,
        max_prediction_length=horizon,
        time_varying_unknown_reals=feature_names,
        scalers={name: StandardScaler() for name in feature_names},
    )

    loader = training_dataset.to_dataloader(batch_size=batch_size, shuffle=False)

    return loader

In [6]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, X):
        X = X.view(X.size(0), -1)
        return self.linear(X)

In [29]:
data_loader = create_training_set(
    data=mvtseries, n_lags=N_LAGS, horizon=HORIZON, batch_size=BATCH_SIZE, test_size=0.3
)

model = LinearRegressionModel(N_LAGS * num_vars, HORIZON)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 3) Training loop
num_epochs = 40
for epoch in range(num_epochs):
    for batch in data_loader:
        x, y = batch

        X = x["encoder_cont"].squeeze(-1)
        y_pred = model(X)
        y_pred = y_pred.squeeze(1)

        y_actual = y[0].squeeze(1)

        loss = criterion(y_pred, y_actual)

        loss.backward()
        optimizer.step()

        optimizer.zero_grad()

    print(f"epoch: {epoch + 1}, loss = {loss.item():.4f}")

epoch: 1, loss = 0.6125
epoch: 2, loss = 0.7414
epoch: 3, loss = 0.7448
epoch: 4, loss = 0.7453
epoch: 5, loss = 0.7494
epoch: 6, loss = 0.7542
epoch: 7, loss = 0.7584
epoch: 8, loss = 0.7619
epoch: 9, loss = 0.7647
epoch: 10, loss = 0.7671
epoch: 11, loss = 0.7692
epoch: 12, loss = 0.7710
epoch: 13, loss = 0.7727
epoch: 14, loss = 0.7743
epoch: 15, loss = 0.7758
epoch: 16, loss = 0.7772
epoch: 17, loss = 0.7786
epoch: 18, loss = 0.7799
epoch: 19, loss = 0.7811
epoch: 20, loss = 0.7822
epoch: 21, loss = 0.7834
epoch: 22, loss = 0.7844
epoch: 23, loss = 0.7854
epoch: 24, loss = 0.7864
epoch: 25, loss = 0.7873
epoch: 26, loss = 0.7882
epoch: 27, loss = 0.7890
epoch: 28, loss = 0.7899
epoch: 29, loss = 0.7906
epoch: 30, loss = 0.7914
epoch: 31, loss = 0.7921
epoch: 32, loss = 0.7928
epoch: 33, loss = 0.7934
epoch: 34, loss = 0.7940
epoch: 35, loss = 0.7946
epoch: 36, loss = 0.7952
epoch: 37, loss = 0.7958
epoch: 38, loss = 0.7963
epoch: 39, loss = 0.7968
epoch: 40, loss = 0.7973


In [35]:
for batch in data_loader:
    x, y = batch
    print(x)
    # print(X.shape, y.shape)
    break

{'encoder_cat': tensor([], size=(10, 7, 0), dtype=torch.int64), 'encoder_cont': tensor([[[ 2.1183, -0.6983, -0.5650,  0.7197,  0.9889,  0.6426,  0.7891,
          -0.2699, -1.7308,  0.0000],
         [ 0.4179, -0.6983, -0.6152,  0.3485, -0.4228,  0.2042,  0.0807,
           0.2655, -1.7283,  0.0000],
         [ 1.8399, -0.6983,  0.3647,  0.5985, -0.4452,  0.4924, -0.2064,
          -0.1265, -1.7258,  0.0000],
         [ 0.9808, -0.6983, -0.6403,  0.3384, -0.4452,  0.1926,  0.2337,
          -0.1934, -1.7233,  0.0000],
         [ 1.3767, -0.6983,  0.5908, -0.0236, -0.4452, -0.1999,  0.4925,
          -0.6714, -1.7208,  0.0000],
         [ 1.5962, -0.6983,  0.6159, -0.4183, -0.4452, -0.5635,  0.1374,
          -0.4324, -1.7182,  0.0000],
         [ 1.4352, -0.6983, -0.3137,  0.4087, -0.4452,  0.2841,  0.2337,
           0.1890, -1.7157,  0.0000]],

        [[ 0.4179, -0.6983, -0.6152,  0.3485, -0.4228,  0.2042,  0.0807,
           0.2655, -1.7283,  0.0000],
         [ 1.8399, -0.6983,  0

In [39]:
x["encoder_cont"].shape

torch.Size([10, 7, 10])

In [14]:
fig = go.Figure()
for col in mvtseries.columns:
    if col not in ["time_index", "group_id"]:
        fig.add_trace(go.Scatter(x=mvtseries.index, y=mvtseries[col], name=col))
fig.layout.showlegend = True
fig.show()

In [28]:
fig = go.Figure()
index = mvtseries.index
for col in mvtseries.columns:
    if col not in ["time_index", "group_id"]:
        scaler = MinMaxScaler()
        values = scaler.fit_transform(mvtseries[col].values.reshape(-1, 1)).reshape(-1)
        fig.add_trace(go.Scatter(x=mvtseries.index, y=values, name=f"{col}: [{scaler.data_min_[0]}, {scaler.data_max_[0]}]"))
fig.layout.showlegend = True
fig.show()