In [1]:

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping
import pandas as pd
import torch

from pytorch_forecasting import Baseline, NBeats, TimeSeriesDataSet
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_forecasting.data.examples import generate_ar_data
from pytorch_forecasting.metrics import SMAPE

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=100, seed=42)
data["static"] = 2
data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D")
data


Unnamed: 0,series,time_idx,value,static,date
0,0,0,-0.000000,2,2020-01-01
1,0,1,-0.046501,2,2020-01-02
2,0,2,-0.097796,2,2020-01-03
3,0,3,-0.144397,2,2020-01-04
4,0,4,-0.177954,2,2020-01-05
...,...,...,...,...,...
39995,99,395,-5.587069,2,2021-01-30
39996,99,396,-4.986342,2,2021-01-31
39997,99,397,-5.630228,2,2021-02-01
39998,99,398,-5.745145,2,2021-02-02


In [35]:
import os 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
import torch
from pytorch_forecasting import TimeSeriesDataSet, NBeats, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE
from pytorch_forecasting.models import NBeatsModel
from pytorch_forecasting.data import NaNLabelEncoder
import pytorch_lightning as pl
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load and preprocess data
df = pd.read_csv('electricity.csv')
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

# Fill missing values
df.fillna(method='ffill', inplace=True)

# Normalize the data
scaler = StandardScaler()
scaled_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

# Split the data into training and test sets
train_size = int(len(scaled_df) * 0.8)
train_data, test_data = scaled_df[:train_size], scaled_df[train_size:]

# Define the dataset parameters
max_encoder_length = 168  # use 7 days of history
max_prediction_length = 24  # predict 24 hours into the future

# Create the TimeSeriesDataSet for training
training = TimeSeriesDataSet(
    train_data.reset_index(),
    time_idx="date",
    target="OT",  # replace with the appropriate target column name
    group_ids=["date"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=list(train_data.columns),
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["OT"],  # replace with the appropriate target column name
    target_normalizer=GroupNormalizer(groups=["date"], transformation="softplus"),
)

# Create the DataLoader for training
train_dataloader = training.to_dataloader(train=True, batch_size=128, num_workers=4)

# Define the N-BEATS model
nbeats = NBeatsModel.from_dataset(
    training,
    learning_rate=3e-3,
    log_interval=10,
    log_val_interval=1,
    weight_decay=1e-2,
    widths=[512, 512, 512, 512],
    backcast_loss_ratio=0.1,
)

# Train the model
trainer = pl.Trainer(gpus=0, max_epochs=30, gradient_clip_val=0.1)
trainer.fit(
    nbeats,
    train_dataloaders=train_dataloader,
)

# Prepare test data similarly
test = TimeSeriesDataSet.from_dataset(training, test_data.reset_index(), predict=True, stop_randomization=True)

# Create DataLoader for test data
test_dataloader = test.to_dataloader(train=False, batch_size=128, num_workers=4)

# Make predictions
predictions = trainer.predict(nbeats, test_dataloaders=test_dataloader)

# Inverse transform the predictions
scaled_predictions = scaler.inverse_transform(predictions.numpy())

# Evaluate the model
rmse = RMSE()(predictions, test_data["OT"].values)

print(f"RMSE: {rmse}")


Unnamed: 0,series,time_idx,value,static,date
0,0,0,-0.000000,2,2020-01-01
1,0,1,-0.046501,2,2020-01-02
2,0,2,-0.097796,2,2020-01-03
3,0,3,-0.144397,2,2020-01-04
4,0,4,-0.177954,2,2020-01-05
...,...,...,...,...,...
397,0,397,2.537928,2,2021-02-01
398,0,398,2.354053,2,2021-02-02
399,0,399,2.323488,2,2021-02-03
400,1,0,-0.000000,2,2020-01-01


In [3]:
# create dataset and dataloaders
max_encoder_length = 60
max_prediction_length = 20

training_cutoff = data["time_idx"].max() - max_prediction_length

context_length = max_encoder_length
prediction_length = max_prediction_length

training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="OT",
    categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
    group_ids=["series"],
    # only unknown variable is "value" - and N-Beats can also not take any additional variables
    time_varying_unknown_reals=["value"],
    max_encoder_length=context_length,
    max_prediction_length=prediction_length,
)

validation = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx=training_cutoff + 1)
batch_size = 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [5]:
training_cutoff

379

In [50]:
import os 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
from transformers import AutoTokenizer, AutoformerForPrediction, AutoformerConfig,AutoformerPreTrainedModel
import torch
import electricity.elec_nbeats.nbeats as nbeats
from pytorch_forecasting import TimeSeriesDataSet
import pandas as pd
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
elec = pd.read_csv("electricity/electricity.csv",index_col=0)

class DataSet:
    """
    Preprocessing.
    """
    def __init__(self, horizon, back_horizon):
        self.horizon = horizon
        self.back_horizon = back_horizon
    
    def preprocessing(self, y, date, train_size=0.7, val_size=0.2):
        
        y = y.copy().astype('float')

        train = y[:int(train_size*len(y))]
        val = y[int(train_size*len(y))-self.back_horizon:int((train_size+val_size)*len(y))]
        test = y[int((train_size+val_size)*len(y))-self.back_horizon:]
        train_date = date[:int(train_size*len(y))]
        val_date = date[int(train_size*len(y))-self.back_horizon:int((train_size+val_size)*len(y))]
        test_date = date[int((train_size+val_size)*len(y))-self.back_horizon:]

        # Training set
        self.X_train, self.y_train, self.train_date = self.create_sequences(train, 
                                                                            train, 
                                                                            train_date,
                                                                            self.horizon, 
                                                                            self.back_horizon)
        # Validation set
        self.X_val, self.y_val, self.val_date = self.create_sequences(val,
                                                                      val,
                                                                      val_date,
                                                                      self.horizon,
                                                                      self.back_horizon)
        # Testing set
        self.X_test, self.y_test, self.test_date = self.create_sequences(test,
                                                                         test,
                                                                         test_date,
                                                                         self.horizon,
                                                                         self.back_horizon)

        # training on all database
        self.X_train_all, self.y_train_all, self.train_all_date = self.create_sequences(y, 
                                                                                        y,
                                                                                        date,
                                                                                        self.horizon,
                                                                                        self.back_horizon)
            
    @staticmethod
    def create_sequences(X, y, d, horizon, time_steps):
        Xs, ys, ds = [], [], []
        for col in range(X.shape[1]):
            for i in range(0, len(X)-time_steps-horizon, 1):
                Xs.append(X[i:(i+time_steps), col])
                ys.append(y[(i+time_steps):(i+time_steps+horizon), col])
                ds.append(d[(i+time_steps):(i+time_steps+horizon)])

        return np.array(Xs), np.array(ys), np.array(ds)
    
back_horizon = 3 * 120
horizon = 120
datasets = DataSet(horizon,  back_horizon)
datasets.preprocessing(elec.values, elec.index, train_size=0.7, val_size=0.2)

In [56]:
datasets.X_train
datasets.X_test

array([[  60.,   77.,   75., ...,   79.,   71.,   62.],
       [  77.,   75.,   60., ...,   71.,   62.,   64.],
       [  75.,   60.,   60., ...,   62.,   64.,   62.],
       ...,
       [2938., 2874., 2751., ..., 3755., 3609., 3413.],
       [2874., 2751., 2682., ..., 3609., 3413., 3115.],
       [2751., 2682., 2779., ..., 3413., 3115., 3054.]])

In [49]:
data.values.reshape(26304,-1)

array([[  14.,   69.,  234., ..., 1558.,  182., 2162.],
       [  18.,   92.,  312., ..., 2177.,  253., 2835.],
       [  21.,   96.,  312., ..., 2193.,  218., 2764.],
       ...,
       [  12.,   93.,    8., ..., 1864.,  621., 2650.],
       [  10.,   92.,    8., ..., 2623.,  783., 2719.],
       [  11.,   88.,    8., ..., 2706.,  647., 2640.]])

In [39]:
data.shape

(26304, 322)