In [1]:
import pandas as pd
import matplotlib.pyplot as plt

from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS
from neuralforecast.losses.pytorch import DistributionLoss

In [2]:
df = pd.read_excel("GMRegions_CPI.xlsx")

In [4]:
df = df.rename(columns={
    'GM Region': 'unique_id',
    'Month-Year': 'ds',
    'CPI': 'trend',
    'EUR': 'y'
})

In [8]:
df.head()

Unnamed: 0,unique_id,ds,y,trend
0,Berema,2020-01-01,1130630.0,1.73
1,Berema,2020-02-01,2034458.0,0.72
2,Berema,2020-03-01,6543391.0,0.54
3,Berema,2020-04-01,5069735.0,0.63
4,Berema,2020-05-01,1050716.0,1.08


In [16]:
# Get unique series IDs
unique_ids = df['unique_id'].unique()

# One-hot encode without prefix
static_features = pd.get_dummies(pd.DataFrame({'unique_id': unique_ids}),
                                 columns=['unique_id'], prefix='', prefix_sep='').astype(int)

# Add unique_id as the first column
df_static = pd.DataFrame({'unique_id': unique_ids})
df_static = pd.concat([df_static, static_features], axis=1)

In [17]:
df_static.head()

Unnamed: 0,unique_id,Berema,DACH and Benelux,Eastern Europe,France,Greater China,India,Liuzhou,MEN,Mexico and Central America,Scandinavia,South America,South Europe,UK and Ireland,US and Canada
0,Berema,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,DACH and Benelux,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,Eastern Europe,0,0,1,0,0,0,0,0,0,0,0,0,0,0
3,France,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,Greater China,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [5]:
# Split data into training (up to 2024-12) and test (2025-01 to 2025-03)
train_cutoff = pd.Timestamp('2025-01-01')
df_train = df[df['ds'] < train_cutoff].copy()   # data up to Dec 2024
df_test  = df[df['ds'] >= train_cutoff].copy()  # data from Jan 2025 onward

print("Training period:", df_train['ds'].min(), "to", df_train['ds'].max())
print("Test period:", df_test['ds'].min(), "to", df_test['ds'].max())
print("Number of series (regions):", df['unique_id'].nunique())
print("Train size:", len(df_train), "rows  |  Test size:", len(df_test), "rows")

Training period: 2020-01-01 00:00:00 to 2024-12-01 00:00:00
Test period: 2025-01-01 00:00:00 to 2025-03-01 00:00:00
Number of series (regions): 14
Train size: 840 rows  |  Test size: 42 rows


In [6]:
model = NHITS(h=12,
              input_size=24,
              loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=True),
              stat_exog_list=['Berema'],
              futr_exog_list=['trend'],
              n_freq_downsample=[2, 1, 1],
              scaler_type='robust',
              max_steps=200,
              early_stop_patience_steps=2,
              inference_windows_batch_size=1,
              val_check_steps=10,
              learning_rate=1e-3)

Seed set to 1


In [7]:
fcst = NeuralForecast(models=[model], freq='MS')

In [24]:
# 1️⃣ Prepare futr_df for prediction
futr_df = df_test[['unique_id', 'ds', 'trend']]

In [None]:
fcst.fit(df=df_train, static_df=df_static, val_size=12)


In [25]:
forecasts = fcst.predict(futr_df=df_test)

ValueError: There are missing combinations of ids and times in `futr_df`.
You can run the `make_future_dataframe()` method to get the expected combinations or the `get_missing_future(futr_df)` method to get the missing combinations.