### Config

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
from huggingface_hub import hf_hub_download
import torch
from uni2ts.eval_util.plot import plot_single, plot_next_multi
from uni2ts.model.moirai import MoiraiForecast
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split

  Referenced from: <2BD1B165-EC09-3F68-BCE4-8FE4E70CA7E2> /Users/tomaltenborg/anaconda3/lib/python3.11/site-packages/torchvision/image.so
  warn(


In [2]:
SIZE = "base"  # model size: choose from {'small', 'base', 'large'}
PSZ = "auto"  # patch size: choose from {"auto", 8, 16, 32, 64, 128}
BSZ = 32  # batch size: any positive integer

### Data

In [64]:
quarter = pd.read_parquet('/Users/tomaltenborg/Documents/Master/Master thesis/Notebooks/M3 Data/M3_quarter_processed.parquet')

## Produce and store forecasts

In [61]:
# Initialize an empty DataFrame to store all forecasts
complete_df = pd.DataFrame()

counter = 0

for series in quarter['Series'].unique():
    # Filter and prepare data for the model
    filtered_temp = quarter[quarter['Series'] == series]
    N = filtered_temp['N'].iloc[0]
    NF = filtered_temp['NF'].iloc[0]
    
    # Prepare the dataset, keeping only the first N-NF records for training
    filtered_df = filtered_temp.iloc[:N-NF]
    filtered_df = filtered_df[['Series', 'Date', 'Value']]

    filtered_df['Date'] = pd.to_datetime(filtered_df['Date']) 

    # Set the 'date' column to year end
    filtered_df['Date'] = filtered_df['Date'] + pd.offsets.QuarterEnd() #IMPORTANT has to be end, cannot be begining

    filtered_df.set_index('Date', inplace=True)

    # Convert into a GluonTS dataset
    ds = PandasDataset.from_long_dataframe(filtered_df, item_id='Series', target='Value')

    # Load and prepare the model
    model = MoiraiForecast.load_from_checkpoint(
        checkpoint_path=hf_hub_download(
            repo_id=f"Salesforce/moirai-1.0-R-{SIZE}", filename="model.ckpt"
        ),
        prediction_length=NF,
        context_length=N,
        patch_size=PSZ,
        num_samples=20, #return 20 distributions for each time point
        target_dim=1,
        feat_dynamic_real_dim=ds.num_feat_dynamic_real,
        past_feat_dynamic_real_dim=ds.num_past_feat_dynamic_real,
        map_location="cuda:0" if torch.cuda.is_available() else "cpu",
    )

    predictor = model.create_predictor(batch_size=BSZ)
    forecasts = predictor.predict(ds)

    counter += 1
    # Handle forecast output
    for forecast in forecasts:
        samples = forecast.samples
        item_id = forecast.item_id
        start_date = pd.Period(forecast.start_date, freq=forecast.start_date.freq)
        forecast_horizon = samples.shape[1]

        # Create a date range for the forecasts
        date_range = pd.period_range(start=start_date, periods=forecast_horizon, freq=start_date.freq)

        # Calculate the median of the samples at each time point
        medians = np.median(samples, axis=0)

        # Create the DataFrame for current forecast
        forecast_df = pd.DataFrame({
            'Date': date_range,
            'Series': item_id,
            'Median_Forecast': medians
        })

        # Append to the complete DataFrame
        complete_df = pd.concat([complete_df, forecast_df], ignore_index=True)
    # Simple print statement to track progress    
    if counter > 100:
        print('Finished 100 forecasts')
        counter = 0

complete_df.reset_index(drop=True, inplace=True)

36
44
40


In [62]:
# Define a function to convert quarter strings to datetime
def quarter_to_date(quarter_str):
    year, quarter = int(quarter_str[:4]), int(quarter_str[-1])
    first_month_of_quarter = 3 * quarter - 2
    return pd.Timestamp(year=year, month=first_month_of_quarter, day=1)

complete_df['Date'] = complete_df['Date'].astype(str)
complete_df['Date'] = complete_df['Date'].apply(quarter_to_date)
# Ensure it is the same date format as TimeGPT for eaiser evaluation
complete_df['Date'] = pd.to_datetime(complete_df['Date']) - pd.DateOffset(months=3)

In [60]:
complete_df.to_parquet('M3_moirai_quarter_forecasts.parquet', engine='pyarrow', compression='snappy', index=False)

Unnamed: 0,Date,Series,Median_Forecast
0,1993-01-01,646,5523.25293
1,1993-04-01,646,5586.301758
2,1993-07-01,646,5551.783203
3,1993-10-01,646,5605.341797
4,1994-01-01,646,5482.976074
5,1994-04-01,646,5525.736328
6,1994-07-01,646,5495.833008
7,1994-10-01,646,5704.464355
