# Morai Transformer architecture implementation 

In [8]:
%load_ext autoreload
%autoreload 2
import torch
import pandas as pd
import numpy as np
import utils

from datasets import load_dataset
from gluonts.dataset.pandas import PandasDataset
from huggingface_hub import hf_hub_download

from uni2ts.model.moirai import MoiraiForecast


TIME_COL = "Date"
TARGET = "visits"
DYNAMIC_COV = ['CPI', 'Inflation_Rate', 'GDP']
SEAS_COV=['month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7','month_8', 'month_9', 'month_10', 'month_11', 'month_12']
FORECAST_HORIZON = 8 # months
FREQ = "M"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


ModuleNotFoundError: No module named 'utils'

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import os
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset

In [2]:
def select_device(requested_device=None):
    """
    Selects the most appropriate device available for computations.
    If a device is specified, it checks if that device is available and uses it.
    If no device is specified, it defaults to CUDA if available, then MPS, and finally CPU.

    Parameters:
    - requested_device (str, optional): The requested device as a string (e.g., 'cuda', 'mps', 'cpu').

    Returns:
    - torch.device: The selected PyTorch device.
    - str: Description of the selected device.
    """
    if requested_device:
        # User has requested a specific device
        if requested_device == 'cuda' and torch.cuda.is_available():
            return torch.device('cuda'), "NVIDIA GPU"
        elif requested_device == 'mps' and torch.backends.mps.is_available():
            return torch.device('mps'), "Apple Silicon GPU"
        elif requested_device == 'cpu':
            return torch.device('cpu'), "CPU"
        else:
            raise ValueError(f"Requested device '{requested_device}' is not available or not recognized.")
    
    # Default selection logic if no device is specified
    if torch.cuda.is_available():
        return torch.device("cuda"), "NVIDIA GPU"
    elif torch.backends.mps.is_available():
        return torch.device("mps"), "Apple Silicon GPU"
    else:
        return torch.device("cpu"), "CPU"

# Select the best available device
device, device_name = select_device() 
print(f"Using {device_name} for computation.")


Using Apple Silicon GPU for computation.


## Load input data

In [3]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path, parse_dates=['date'])
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values('date', inplace=True)
    df.set_index('date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [4]:
df = load_and_prepare_data('../../data/Final_data/final_data.csv')

In [5]:
df

Unnamed: 0_level_0,Day_ahead_price,BEV_vehicles,Biomass (GWh),Hard Coal (GWh),Hydro (GWh),Lignite (GWh),Natural Gas (GWh),Nuclear (GWh),Oil_price (EUR),Other (GWh),Pumped storage generation (GWh),Solar (GWh),Solar_radiation,TTF_gas_price (EUR/MWh),Temperature,Wind offshore (GWh),Wind onshore (GWh),Wind_speed,net_total_export_import (GWh)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2012-01-01,18.1896,6,98.605,108.454,51.011,325.337,188.811,250.979,98.99,54.040,19.314,6.263,14.75,21.1000,8.39,3.404,235.467,4.95,54.662
2012-01-02,33.8188,6,98.605,222.656,51.862,343.168,229.293,258.671,99.39,54.166,28.892,6.312,15.12,20.0000,7.41,3.350,231.772,5.00,-64.477
2012-01-03,35.0263,6,98.605,162.204,48.851,336.773,241.297,271.495,99.79,53.518,21.072,24.226,31.88,20.9000,5.23,7.292,504.484,7.77,-35.078
2012-01-04,32.1633,6,98.605,189.633,47.101,323.976,252.289,270.613,102.48,52.194,28.300,14.157,25.21,21.4000,4.78,7.828,541.528,8.04,22.924
2012-01-05,20.3500,6,98.605,175.733,45.854,327.502,259.018,287.555,103.24,52.179,31.887,4.728,13.46,21.3000,4.23,8.280,572.819,9.98,35.618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-25,61.0938,947,123.683,48.501,58.671,193.697,255.124,0.000,74.73,60.627,13.565,169.216,97.58,23.7625,4.52,29.879,278.787,3.43,-36.930
2024-02-26,66.2683,947,124.810,69.146,58.444,281.177,289.764,0.000,74.60,59.810,12.231,110.504,73.25,23.9000,4.96,62.336,239.555,3.12,-198.686
2024-02-27,73.8362,947,124.989,103.379,59.181,351.355,354.042,0.000,75.61,67.170,23.753,85.584,58.12,24.8300,4.53,16.951,131.761,3.11,-209.332
2024-02-28,71.8213,947,125.068,93.416,58.160,350.348,338.216,0.000,76.07,65.375,19.042,106.330,66.00,24.8000,3.69,68.585,76.355,2.46,-206.956


## Start implementation 


In [6]:
import torch
import matplotlib.pyplot as plt
import pandas as pd
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from huggingface_hub import hf_hub_download

from uni2ts.eval_util.plot import plot_single
from uni2ts.model.moirai import MoiraiForecast, MoiraiModule



ModuleNotFoundError: No module named 'gluonts.dataset.pandas'

In [None]:

SIZE = "small"  # model size: choose from {'small', 'base', 'large'}
PDT = 20  # prediction length: any positive integer
CTX = 200  # context length: any positive integer
PSZ = "auto"  # patch size: choose from {"auto", 8, 16, 32, 64, 128}
BSZ = 32  # batch size: any positive integer
TEST = 100  # test set length: any positive integer

# Read data into pandas DataFrame
url = (
    "https://gist.githubusercontent.com/rsnirwan/c8c8654a98350fadd229b00167174ec4"
    "/raw/a42101c7786d4bc7695228a0f2c8cea41340e18f/ts_wide.csv"
)
df = pd.read_csv(url, index_col=0, parse_dates=True)

# Convert into GluonTS dataset
ds = PandasDataset(dict(df))

# Split into train/test set
train, test_template = split(
    ds, offset=-TEST
)  # assign last TEST time steps as test set

# Construct rolling window evaluation
test_data = test_template.generate_instances(
    prediction_length=PDT,  # number of time steps for each prediction
    windows=TEST // PDT,  # number of windows in rolling window evaluation
    distance=PDT,  # number of time steps between each window - distance=PDT for non-overlapping windows
)

# Prepare pre-trained model by downloading model weights from huggingface hub
model = MoiraiForecast(
    module=MoiraiModule.from_pretrained(f"Salesforce/moirai-1.0-R-{SIZE}"),
    prediction_length=PDT,
    context_length=CTX,
    patch_size=PSZ,
    num_samples=100,
    target_dim=1,
    feat_dynamic_real_dim=ds.num_feat_dynamic_real,
    past_feat_dynamic_real_dim=ds.num_past_feat_dynamic_real,
)

predictor = model.create_predictor(batch_size=BSZ)
forecasts = predictor.predict(test_data.input)

input_it = iter(test_data.input)
label_it = iter(test_data.label)
forecast_it = iter(forecasts)

inp = next(input_it)
label = next(label_it)
forecast = next(forecast_it)

plot_single(
    inp, 
    label, 
    forecast, 
    context_length=200,
    name="pred",
    show_label=True,
)
plt.show()