### Config

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from chronos_mlx import ChronosPipeline
pd.set_option('display.max_columns', None)

In [2]:
month = pd.read_parquet('/Users/tomaltenborg/Documents/Master/Master thesis/Notebooks/M-Competitions/M3-Competition/Month/M3_month_processed.parquet')
month

Unnamed: 0,Series,N,NF,Category,Measurement,Value,Date
0,1402,68,18,MICRO,1,2640.0,1990-01-01
1,1402,68,18,MICRO,2,2640.0,1990-02-01
2,1402,68,18,MICRO,3,2160.0,1990-03-01
3,1402,68,18,MICRO,4,4200.0,1990-04-01
4,1402,68,18,MICRO,5,3360.0,1990-05-01
...,...,...,...,...,...,...,...
167557,2829,71,18,OTHER,67,1282.5,1988-07-01
167558,2829,71,18,OTHER,68,1261.3,1988-08-01
167559,2829,71,18,OTHER,69,1263.4,1988-09-01
167560,2829,71,18,OTHER,70,1257.1,1988-10-01


## Loop and Produce Forecasts for All Monthly Series

In [3]:
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-large",
    dtype="bfloat16",
)

unique_series = month['Series'].unique()

forecasts = {}

for series_id in unique_series:
    # Filter the dataframe for the current series
    series_data = month[month['Series'] == series_id]
    
    # Ensure the data is sorted by date
    series_data = series_data.sort_values('Date')
    
    # Get the number of observations to use for forecasting
    N = series_data['N'].iloc[0]
    NF = series_data['NF'].iloc[0]
    
    # Get the date of the last observation used for training
    last_training_date = series_data['Date'].iloc[N-NF-1]  # -1 because iloc is 0-based

    # Prepare the input data for the model
    training_data = series_data['Value'].iloc[:N-NF].values  # Exclude the last NF values
    
    # Calculate forecast dates assuming monthly data
    forecast_dates = pd.date_range(start=last_training_date, periods=NF+1, freq='YS')[1:]  # Skip the first since it's the last training date
    
    # Predict the next NF values
    try:
        model_forecasts = pipeline.predict(
            context=training_data,
            prediction_length=NF,
            num_samples=N-NF,
            temperature=1.0,
            top_k=50,
            top_p=1.0
        )#.squeeze(0).squeeze(0).numpy()  # Remove batch and sample dimensions for simplicity
    except Exception as e:
        print(f"Failed to predict for series {series_id} with error: {e}")
        continue
    
    # Quantiles for uncertainty bounds
    low, median, high = np.quantile(model_forecasts[0], [0.1, 0.5, 0.9], axis=0)
    forecasts[series_id] = {
        'Date': forecast_dates,
        'low': low,
        'median': median,
        'high': high
    }

# Convert forecasts dictionary to a DataFrame for easier manipulation
forecast_df = pd.DataFrame()
for series_id, data in forecasts.items():
    df_temp = pd.DataFrame({
        'Date': data['Date'],
        'Series_ID': series_id,
        'Low': data['low'],
        'Median': data['median'],
        'High': data['high']
    })
    forecast_df = pd.concat([forecast_df, df_temp], ignore_index=True)

# forecast_df.set_index(['Date', 'Series_ID'], inplace=True)
print('Forecasting completed for all series.')

In [None]:
forecast_df.to_parquet('M3_Chronos_month_forecast.parquet', index=False,  compression='gzip', engine='pyarrow')