### Config

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from chronos_mlx import ChronosPipeline
pd.set_option('display.max_columns', None)

In [2]:
year = pd.read_parquet('/Users/tomaltenborg/Documents/Master/Master thesis/Notebooks/M-Competitions/M3-Competition/Year/M3_yearly_processed.parquet')

In [6]:
year.loc[year['Series'] == 1]

Unnamed: 0,Series,N,NF,Category,Value,Date
0,1,20,6,MICRO,940.66,1975-01-01
1,1,20,6,MICRO,1084.86,1976-01-01
2,1,20,6,MICRO,1244.98,1977-01-01
3,1,20,6,MICRO,1445.02,1978-01-01
4,1,20,6,MICRO,1683.17,1979-01-01
5,1,20,6,MICRO,2038.15,1980-01-01
6,1,20,6,MICRO,2342.52,1981-01-01
7,1,20,6,MICRO,2602.45,1982-01-01
8,1,20,6,MICRO,2927.87,1983-01-01
9,1,20,6,MICRO,3103.96,1984-01-01


## Loop and Produce Forecasts for All Yearly Series

In [4]:
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-large",
    dtype="bfloat16",
)

unique_series = year['Series'].unique()

forecasts = {}

for series_id in unique_series:
    # Filter the dataframe for the current series
    series_data = year[year['Series'] == series_id]
    
    # Ensure the data is sorted by date
    series_data = series_data.sort_values('Date')
    
    # Get the number of observations to use for forecasting
    N = series_data['N'].iloc[0]
    NF = series_data['NF'].iloc[0]
    
    # Get the date of the last observation used for training
    last_training_date = series_data['Date'].iloc[N-NF-1]  # -1 because iloc is 0-based

    # Prepare the input data for the model
    training_data = series_data['Value'].iloc[:N-NF].values  # Exclude the last NF values
    
    # Calculate forecast dates assuming yearly data
    forecast_dates = pd.date_range(start=last_training_date, periods=NF+1, freq='YS')[1:]  # Skip the first since it's the last training date
    
    # Predict the next NF values
    try:
        model_forecasts = pipeline.predict(
            context=training_data,
            prediction_length=NF,
            num_samples=N-NF,
            temperature=1.0,
            top_k=50,
            top_p=1.0
        )#.squeeze(0).squeeze(0).numpy()  # Remove batch and sample dimensions for simplicity
    except Exception as e:
        print(f"Failed to predict for series {series_id} with error: {e}")
        continue
    
    # Quantiles for uncertainty bounds
    low, median, high = np.quantile(model_forecasts[0], [0.1, 0.5, 0.9], axis=0)
    forecasts[series_id] = {
        'Date': forecast_dates,
        'low': low,
        'median': median,
        'high': high
    }

# Convert forecasts dictionary to a DataFrame for easier manipulation
forecast_df = pd.DataFrame()
for series_id, data in forecasts.items():
    df_temp = pd.DataFrame({
        'Date': data['Date'],
        'Series_ID': series_id,
        'Low': data['low'],
        'Median': data['median'],
        'High': data['high']
    })
    forecast_df = pd.concat([forecast_df, df_temp], ignore_index=True)

# forecast_df.set_index(['Date', 'Series_ID'], inplace=True)
print('Forecasting completed for all series.')

Forecasting completed for all series.


In [7]:
forecast_df.loc[forecast_df['Series_ID'] == 1]

Unnamed: 0,Date,Series_ID,Low,Median,High
0,1989-01-01,1,5061.795964,5340.081923,5667.255956
1,1990-01-01,1,5044.873169,5650.333161,6016.993716
2,1991-01-01,1,5031.710995,5857.16732,6329.125265
3,1992-01-01,1,4755.305346,6092.206137,6629.974951
4,1993-01-01,1,4557.87274,6073.403032,6784.160415
5,1994-01-01,1,4742.143172,6101.60769,6801.08321


In [8]:
forecast_df.to_parquet('M3_Chronos_year_forecast.parquet', index=False,  compression='gzip', engine='pyarrow')