### Config

In [1]:
import numpy as np
import pandas as pd
from chronos_mlx import ChronosPipeline
pd.set_option('display.max_columns', None)

In [2]:
month = pd.read_parquet('/Users/tomaltenborg/Documents/Master/Master thesis/Notebooks/M3 Data/M3_month_processed.parquet')

## Loop and Produce Forecasts for All Monthly Series

In [3]:
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-base",
    dtype="bfloat16",
)

unique_series = month['Series'].unique()

forecasts = {}

counter = 0

for series_id in unique_series:

    series_data = month[month['Series'] == series_id]
    
    # Ensure the data is sorted by date
    series_data = series_data.sort_values('Date')
    
    # Get the number of observations to use for forecasting
    N = series_data['N'].iloc[0]
    NF = series_data['NF'].iloc[0]
    
    # Get the date of the last observation used for training
    last_training_date = series_data['Date'].iloc[N-NF-1]

    # Prepare the input data for the model
    training_data = series_data['Value'].iloc[:N-NF].values  # Exclude the last NF values
    
    # Calculate forecast dates assuming monthly data
    forecast_dates = pd.date_range(start=last_training_date, periods=NF+1, freq='MS')[1:]  # Skip the first since it's the last training date
    
    # Predict the next NF values
    try:
        model_forecasts = pipeline.predict(
            context=training_data,
            prediction_length=NF,
            num_samples=N-NF
        )
    except Exception as e:
        print(f"Failed to predict for series {series_id} with error: {e}")
        continue
    counter += 1
    if counter > 100:
        print('100 series forecasted')
        counter = 0
    # Quantiles for uncertainty bounds
    low, median, high = np.quantile(model_forecasts[0], [0.1, 0.5, 0.9], axis=0)
    forecasts[series_id] = {
        'Date': forecast_dates,
        'low': low,
        'median': median,
        'high': high
    }

# Convert forecasts dictionary to a DataFrame for easier manipulation
forecast_df = pd.DataFrame()
for series_id, data in forecasts.items():
    df_temp = pd.DataFrame({
        'Date': data['Date'],
        'Series_ID': series_id,
        'Low': data['low'],
        'Median': data['median'],
        'High': data['high']
    })
    forecast_df = pd.concat([forecast_df, df_temp], ignore_index=True)

# forecast_df.set_index(['Date', 'Series_ID'], inplace=True)
print('Forecasting completed for all series.')

100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
100 series forecasted
Forecasting completed for all series.


In [4]:
forecast_df.head(18)

Unnamed: 0,Date,Series_ID,Low,Median,High
0,1994-03-01,1402,1945.055718,2778.651026,4234.134897
1,1994-04-01,1402,1802.153666,2606.639296,4387.622287
2,1994-05-01,1402,1693.653959,2910.967742,5115.364223
3,1994-06-01,1402,1825.970674,3215.296188,5104.778886
4,1994-07-01,1402,2053.555425,2725.72434,5305.900293
5,1994-08-01,1402,1812.739003,2791.882698,4268.537243
6,1994-09-01,1402,1945.055718,2963.894428,4578.158358
7,1994-10-01,1402,1958.28739,2844.809384,5340.302639
8,1994-11-01,1402,1336.398827,2778.651026,4591.390029
9,1994-12-01,1402,1683.068622,2672.797654,4853.377126


In [5]:
forecast_df.to_parquet('M3_Chronos_month_forecast.parquet', index=False,  compression='gzip', engine='pyarrow')