### Config

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from huggingface_hub import hf_hub_download
import torch
from uni2ts.eval_util.plot import plot_single, plot_next_multi
from uni2ts.model.moirai import MoiraiForecast
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split

In [2]:
SIZE = "base"  # model size
PSZ = "auto"  # patch size
BSZ = 32  # batch size

### Data

In [9]:
month = pd.read_parquet('/Users/tomaltenborg/Documents/Master/Master thesis/Notebooks/M3 Data/M3_month_processed.parquet')

## Produce and store forecasts

In [10]:
# Initialize an empty DataFrame to store all forecasts
complete_df = pd.DataFrame()

counter = 0

for series in month['Series'].unique():
    
    filtered_temp = month[month['Series'] == series]
    N = filtered_temp['N'].iloc[0]
    NF = filtered_temp['NF'].iloc[0]
    
    # Prepare the dataset, keeping only the first N-NF records for training
    filtered_df = filtered_temp.iloc[:N-NF]
    filtered_df = filtered_df[['Series', 'Date', 'Value']]

    filtered_df['Date'] = pd.to_datetime(filtered_df['Date']) 

    # Set the 'date' column to year end
    filtered_df['Date'] = filtered_df['Date'] + pd.offsets.MonthEnd() #IMPORTANT has to be end, cannot be begining

    filtered_df.set_index('Date', inplace=True)

    # Convert into a GluonTS dataset
    ds = PandasDataset.from_long_dataframe(filtered_df, item_id='Series', target='Value')

    # Load and prepare the model
    model = MoiraiForecast.load_from_checkpoint(
        checkpoint_path=hf_hub_download(
            repo_id=f"Salesforce/moirai-1.0-R-{SIZE}", filename="model.ckpt"
        ),
        prediction_length=NF,
        context_length=N,
        patch_size=PSZ,
        num_samples=20, #return 20 distributions for each time point
        target_dim=1,
        feat_dynamic_real_dim=ds.num_feat_dynamic_real,
        past_feat_dynamic_real_dim=ds.num_past_feat_dynamic_real,
        map_location="cuda:0" if torch.cuda.is_available() else "cpu",
    )

    predictor = model.create_predictor(batch_size=BSZ)
    forecasts = predictor.predict(ds)

    counter += 1
    # Handle forecast output
    for forecast in forecasts:
        samples = forecast.samples
        item_id = forecast.item_id
        start_date = pd.Period(forecast.start_date, freq=forecast.start_date.freq)
        forecast_horizon = samples.shape[1]

        # Create a date range for the forecasts
        date_range = pd.period_range(start=start_date, periods=forecast_horizon, freq=start_date.freq)

        # Calculate the median of the samples at each time point
        medians = np.median(samples, axis=0)

        # Create the DataFrame for current forecast
        forecast_df = pd.DataFrame({
            'Date': date_range,
            'Series': item_id,
            'Median_Forecast': medians
        })

        # Append to the complete DataFrame
        complete_df = pd.concat([complete_df, forecast_df], ignore_index=True)
    # Simple print statement to track progress    
    if counter > 100:
        print('Finished 100 forecasts')
        counter = 0

complete_df.reset_index(drop=True, inplace=True)

Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts
Finished 100 forecasts


In [11]:
# Turn to actual date values instead of periods
complete_df["Date"] = pd.to_datetime(complete_df["Date"].dt.to_timestamp())
complete_df

Unnamed: 0,Date,Series,Median_Forecast
0,1994-03-01,1402,2941.182373
1,1994-04-01,1402,3555.800781
2,1994-05-01,1402,3705.567383
3,1994-06-01,1402,3240.018066
4,1994-07-01,1402,3443.771240
...,...,...,...
25699,1988-07-01,2829,1307.136963
25700,1988-08-01,2829,1468.140381
25701,1988-09-01,2829,1361.496338
25702,1988-10-01,2829,1473.072998


In [12]:
complete_df.to_parquet('M3_moirai_month_forecasts.parquet', engine='pyarrow', compression='snappy', index=False)