### Config

In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from huggingface_hub import hf_hub_download
import torch
from uni2ts.eval_util.plot import plot_single, plot_next_multi
from uni2ts.model.moirai import MoiraiForecast
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split

In [3]:
SIZE = "base"  # model size: choose from {'small', 'base', 'large'}
PSZ = "auto"  # patch size: choose from {"auto", 8, 16, 32, 64, 128}
BSZ = 32  # batch size: any positive integer

In [4]:
year = pd.read_parquet('/Users/tomaltenborg/Documents/Master/Master thesis/Notebooks/M3 Data/M3_yearly_processed.parquet')

In [5]:
# Initialize an empty DataFrame to store all forecasts
complete_df = pd.DataFrame()

counter = 0

for series in year['Series'].unique():
    # Filter and prepare data for the model
    filtered_temp = year[year['Series'] == series]
    N = filtered_temp['N'].iloc[0]
    NF = filtered_temp['NF'].iloc[0]
    
    # Prepare the dataset, keeping only the first N-NF records for training
    filtered_df = filtered_temp.iloc[:N-NF]
    filtered_df = filtered_df[['Series', 'Date', 'Value']]

    filtered_df['Date'] = pd.to_datetime(filtered_df['Date']) 

    # Set the 'date' column to year end
    filtered_df['Date'] = filtered_df['Date'] + pd.offsets.YearEnd() #IMPORTANT has to be end, cannot be begining

    filtered_df.set_index('Date', inplace=True)

    # Convert into a GluonTS dataset
    ds = PandasDataset.from_long_dataframe(filtered_df, item_id='Series', target='Value')

    # Load and prepare the model
    model = MoiraiForecast.load_from_checkpoint(
        checkpoint_path=hf_hub_download(
            repo_id=f"Salesforce/moirai-1.0-R-{SIZE}", filename="model.ckpt"
        ),
        prediction_length=NF,
        context_length=N,
        patch_size=PSZ,
        num_samples=20, #return 20 distributions for each time point
        target_dim=1,
        feat_dynamic_real_dim=ds.num_feat_dynamic_real,
        past_feat_dynamic_real_dim=ds.num_past_feat_dynamic_real,
        map_location="cuda:0" if torch.cuda.is_available() else "cpu",
    )

    predictor = model.create_predictor(batch_size=BSZ)
    forecasts = predictor.predict(ds)

    counter += 1
    # Handle forecast output
    for forecast in forecasts:
        samples = forecast.samples
        item_id = forecast.item_id
        start_date = pd.Period(forecast.start_date, freq=forecast.start_date.freq)
        forecast_horizon = samples.shape[1]

        # Create a date range for the forecasts
        date_range = pd.period_range(start=start_date, periods=forecast_horizon, freq=start_date.freq)

        # Calculate the median of the samples at each time point
        medians = np.median(samples, axis=0)

        # Create the DataFrame for current forecast
        forecast_df = pd.DataFrame({
            'Date': date_range,
            'Series': item_id,
            'Median_Forecast': medians
        })

        # Append to the complete DataFrame
        complete_df = pd.concat([complete_df, forecast_df], ignore_index=True)
    # Simple print statement to track progress    
    if counter > 100:
        print('Finished 100 forecasts')
        counter = 0

complete_df.reset_index(drop=True, inplace=True)

TypeError: MoiraiForecast.__init__() missing 2 required positional arguments: 'feat_dynamic_real_dim' and 'past_feat_dynamic_real_dim'

In [5]:
# Subtract one so that it is the same manner as TimeGPT and Chronos for comparison
# Turn to proper dates again
complete_df['Date'] = complete_df['Date'] - 1 
complete_df['Date'] = pd.to_datetime(complete_df['Date'], format='%Y')
complete_df['Date'] = complete_df['Date'].dt.to_period('Y').dt.start_time

complete_df

Unnamed: 0,Date,Series,Median_Forecast
0,1989-01-01,1,5075.040039
1,1990-01-01,1,5983.503906
2,1991-01-01,1,5960.694336
3,1992-01-01,1,6046.625977
4,1993-01-01,1,7204.881348
...,...,...,...
3865,1987-01-01,645,5653.820312
3866,1988-01-01,645,6361.310547
3867,1989-01-01,645,6166.613281
3868,1990-01-01,645,5600.120117


In [6]:
complete_df.to_parquet('M3_moirai_year_forecasts.parquet', engine='pyarrow', compression='snappy', index=False)

### Short Documentation
The model returns arrays which are distributions of the forecast. For now the model is set to return 20 distributions for each point forecast it does. 