https://nixtlaverse.nixtla.io/neuralforecast/docs/capabilities/cross_validation.html

In [None]:
# !pip install neuralforecast

In [None]:
import logging
import matplotlib.pyplot as plt
import pandas as pd
from utilsforecast.plotting import plot_series

from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS

from neuralforecast.auto import AutoNHITS, AutoRNN, AutoLSTM, AutoGRU, AutoTCN, AutoDeepAR, AutoDilatedRNN, AutoBiTCN
from neuralforecast.auto import AutoMLP, AutoNBEATS, AutoNBEATSx, AutoDLinear, AutoNLinear, AutoTiDE, AutoDeepNPTS
from neuralforecast.auto import AutoTFT, AutoVanillaTransformer, AutoInformer, AutoAutoformer, AutoFEDformer
from neuralforecast.auto import AutoPatchTST, AutoiTransformer, AutoTimesNet

In [None]:
import logging
import os
import warnings

warnings.filterwarnings('ignore')

# Change the default logging directory
os.environ["LIGHTNING_LOGS_DIR"] = "/workdir/my_lightning_logs"  # Or any other desired path

logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

In [None]:
# Y_df = pd.read_parquet('https://datasets-nixtla.s3.amazonaws.com/m4-hourly.parquet')
# Y_df

In [None]:
folder = './datasets/crypto/hourly/'

df = pd.read_csv(folder + 'dataset.csv')
df

In [None]:
date_column_name = df.columns[0]
date_format = '%Y-%m-%d' if date_column_name.lower() == 'date' else '%Y-%m-%d %H:%M:%S'

date_column_name, date_format

In [None]:
# Converter a coluna para datetime removendo o fuso horário
if date_column_name.lower() == 'date':
    df[date_column_name] = pd.to_datetime(df[date_column_name])
else:
    df[date_column_name] = pd.to_datetime(df[date_column_name]).dt.tz_convert(None)

df

In [None]:
# pure pandas version (faster, more memory-friendly)
def convert_nixtla(df):
  # Convert from wide to long format
  df_long = df.melt(id_vars=[date_column_name], var_name="ticker", value_name="price")

  # Rename columns for Nixtla’s long format and return
  return df_long.rename(columns={date_column_name: "ds", "ticker": "unique_id", "price": "y"})

In [None]:
Y_df = convert_nixtla(df.drop('Real', axis=1))
Y_df

In [None]:
# Y_df = Y_df.query("unique_id == 'BTC-USD'")#[:700]
# Y_df

In [None]:
plot_series(Y_df)

In [None]:
import neuralforecast.auto
from neuralforecast.auto import AutoNHITS, AutoRNN, AutoLSTM, AutoGRU, AutoTCN, AutoDeepAR, AutoDilatedRNN, AutoBiTCN
from neuralforecast.auto import AutoMLP, AutoNBEATS, AutoNBEATSx, AutoDLinear, AutoNLinear, AutoTiDE, AutoDeepNPTS
from neuralforecast.auto import AutoTFT, AutoVanillaTransformer, AutoInformer, AutoAutoformer, AutoFEDformer
from neuralforecast.auto import AutoPatchTST, AutoiTransformer, AutoTimesNet

horizont = 1

# --- CONFIGS ---

# Extract the default hyperparameter settings

#A. RNN-Based
rnn_config = AutoRNN.get_default_config(h = horizont, backend="ray")
lstm_config = AutoLSTM.get_default_config(h = horizont, backend="ray")
gru_config = AutoGRU.get_default_config(h = horizont, backend="ray")
tcn_config = AutoTCN.get_default_config(h = horizont, backend="ray")
deep_ar_config = AutoDeepAR.get_default_config(h = horizont, backend="ray")
dilated_rnn_config = AutoDilatedRNN.get_default_config(h = horizont, backend="ray")
bitcn_config = AutoBiTCN.get_default_config(h = horizont, backend="ray")

#B. MLP-Based
mlp_config = AutoMLP.get_default_config(h = horizont, backend="ray")
nbeats_config = AutoNBEATS.get_default_config(h = horizont, backend="ray")
nbeatsx_config = AutoNBEATSx.get_default_config(h = horizont, backend="ray")
nhits_config = AutoNHITS.get_default_config(h = horizont, backend="ray")
dlinear_config = AutoDLinear.get_default_config(h = horizont, backend="ray")
nlinear_config = AutoNLinear.get_default_config(h = horizont, backend="ray")
tide_config = AutoTiDE.get_default_config(h = horizont, backend="ray")
deep_npts_config = AutoDeepNPTS.get_default_config(h = horizont, backend="ray")

#C. Transformer models
tft_config = AutoTFT.get_default_config(h = horizont, backend="ray")
vanilla_config = AutoVanillaTransformer.get_default_config(h = horizont, backend="ray")
informer_config = AutoInformer.get_default_config(h = horizont, backend="ray")
autoformer_config = AutoAutoformer.get_default_config(h = horizont, backend="ray")
fedformer_config = AutoFEDformer.get_default_config(h = horizont, backend="ray")
patch_tst_config = AutoPatchTST.get_default_config(h = horizont, backend="ray")

itransformer_config = AutoiTransformer.get_default_config(h = horizont, n_series=1, backend="ray")

#D. CNN Based
timesnet_config = AutoTimesNet.get_default_config(h = horizont, backend="ray")

In [None]:
# --- MODELS ---
#A. RNN-Based
rnn_model = AutoRNN(h=horizont, config=rnn_config, verbose=False)
lstm_model = AutoLSTM(h=horizont, config=lstm_config, verbose=False)
gru_model = AutoGRU(h=horizont, config=gru_config, verbose=False)
tcn_model = AutoTCN(h=horizont, config=tcn_config, verbose=False)
deep_ar_model = AutoDeepAR(h=horizont, config=deep_ar_config, verbose=False)
dilated_rnn_model = AutoDilatedRNN(h=horizont, config=dilated_rnn_config, verbose=False)
bitcn_model = AutoBiTCN(h=horizont, config=bitcn_config, verbose=False)

#B. MLP-Based
mlp_model = AutoMLP(h=horizont, config=mlp_config, verbose=False)
nbeats_model = AutoNBEATS(h=horizont, config=nbeats_config, verbose=False)
nbeatsx_model = AutoNBEATSx(h=horizont, config=nbeats_config, verbose=False)
nhits_model = AutoNHITS(h=horizont, config=nhits_config, verbose=False)
dlinear_model = AutoDLinear(h=horizont, config=dlinear_config, verbose=False)
nlinear_model = AutoNLinear(h=horizont, config=nlinear_config, verbose=False)
tide_model = AutoTiDE(h=horizont, config=tide_config, verbose=False)
deep_npts_model = AutoDeepNPTS(h=horizont, config=deep_npts_config, verbose=False)

#C. Transformer models
tft_model = AutoTFT(h=horizont, config=tft_config, verbose=False)
vanilla_model = AutoVanillaTransformer(h=horizont, config=vanilla_config, verbose=False)
informer_model = AutoInformer(h=horizont, config=informer_config, verbose=False)
autoformer_model = AutoAutoformer(h=horizont, config=autoformer_config, verbose=False)
fedformer_model = AutoFEDformer(h=horizont, config=fedformer_config, verbose=False)
patch_tst_model = AutoPatchTST(h=horizont, config=patch_tst_config, verbose=False)

itransformer_model = AutoiTransformer(h=horizont, n_series=1, config=itransformer_config, verbose=False)

#D. CNN Based
timesnet_model = AutoTimesNet(h=horizont, config=timesnet_config, verbose=False)

In [None]:
MODEL_NAMES = [
    # 'lstm',
    # 'gru',
    # 'mlp',
    # 'dlinear',
    # 'nlinear',
    # 'informer',
    # 'autoformer',
    # 'fedformer',
    'bitcn',
    'rnn',

    'tcn',
    'deep_ar',
    'dilated_rnn',
    #nbeats,
    #nbeatsx,
    'nhits',
    'tide',
    'deep_npts',
    'tft',
    'vanilla',
    'patch_tst',
    'itransformer'
]

In [None]:
models = [
    # lstm_model,
    # gru_model,
    # mlp_model,
    # dlinear_model,
    # nlinear_model,
    # informer_model,
    # autoformer_model,
    # fedformer_model,
    bitcn_model,
    rnn_model,

    tcn_model,
    deep_ar_model,
    dilated_rnn_model,
    #nbeats,
    #nbeatsx,
    nhits_model,
    tide_model,
    deep_npts_model,
    tft_model,
    vanilla_model,
    patch_tst_model,
    itransformer_model
]

In [None]:
# %%time
# # training each model individually
# import logging

# # Configure logging
# logging.basicConfig(filename='cross_logs.txt', level=logging.INFO, filemode='w')

# h = horizont
# n_windows=5888
# #refit=False
# refit=24*30 #refit every 30 days
# nfs = []
# cv_dfs = []

# for model in models:
#     logging.info(f"Starting cross validation for model {type(model)}")
#     nf = NeuralForecast(models=[model], freq='h');

#     try:
#       cv_df = nf.cross_validation(Y_df, n_windows=n_windows, step_size=h, refit=refit)
#       cv_dfs.append(cv_df)
#       nfs.append(nf)
#       logging.info(f"Finished cross validation of model {type(model)}")
#     except Exception as e:
#       logging.error(f"Error in cross validation for model {type(model)}: {e}")

In [None]:
#folder = './datasets/crypto/hourly/'
cross_folder = folder + '/cross-validation/plain'
refit_folder = folder + '/cross-validation/refit'
output_folder = refit_folder
model_folder = '/models/'
forecast_folder = '/forecasts/'

os.makedirs(output_folder + forecast_folder, exist_ok=True)

# for i in range(len(models)):
#     nfs[i].save(output_folder + model_folder + MODEL_NAMES[i].lower(), overwrite=True)
#     cv_dfs[i].to_csv(output_folder + forecast_folder + MODEL_NAMES[i] + '.csv', index=False)

In [None]:
%%time
# training each model individually
import logging

# Configure logging
logging.basicConfig(filename='cross_logs.txt', level=logging.INFO, filemode='w')

h = horizont
n_windows=5888
#refit=False
refit=24*30 #refit every 30 days
nfs = []
cv_dfs = []

# for model in models:
for i in range(len(models)):
    logging.info(f"Starting cross validation for model {type(models[i])}")
    nf = NeuralForecast(models=[models[i]], freq='h');

    try:
      cv_df = nf.cross_validation(Y_df, n_windows=n_windows, step_size=h, refit=refit)
      cv_dfs.append(cv_df)
      nfs.append(nf)
      nfs[i].save(output_folder + model_folder + MODEL_NAMES[i].lower(), overwrite=True)
      cv_dfs[i].to_csv(output_folder + forecast_folder + MODEL_NAMES[i] + '.csv', index=False)
      logging.info(f"Finished cross validation of model {type(models[i])}")
    except Exception as e:
      logging.error(f"Error in cross validation for model {type(models[i])}: {e}")

In [None]:
import IPython
IPython.display.Audio("file_example_MP3_1MG.mp3", autoplay=True)

In [None]:
%%time
# training all models together
h = horizont
n_windows=5888

nf = NeuralForecast(models=models, freq='h');
cv_df = nf.cross_validation(Y_df, n_windows=n_windows, step_size=h)

nf.save(cross_folder + model_folder + 'ALL', overwrite=True)
cv_df.to_csv(cross_folder + forecast_folder + 'ALL.csv', index=False)

cv_df

In [None]:
cutoffs = cv_df['cutoff'].unique()

model_name = 'AutoLSTM'

plt.figure(figsize=(15,5))
plt.xlim('2024-08-26 17:00:00', '2025-04-29 00:00:00')
plt.plot(Y_df['ds'], Y_df['y'])
plt.plot(cv_df['ds'], ls='--')

for cutoff in cutoffs:
    plt.axvline(x=cutoff, color='black', ls=':')

plt.xlabel('Time steps')
plt.ylabel('Target')
plt.legend()
plt.tight_layout()

In [None]:
# cv_df_val_test = nf.cross_validation(Y_df, val_size=2000, test_size=200, step_size=h, n_windows=None)

In [None]:
# cutoffs = cv_df_val_test['cutoff'].unique()
# plt.figure(figsize=(15,5))

# # Plot the original data and NHITS predictions
# plt.plot(Y_df['ds'], Y_df['y'])
# plt.plot(cv_df_val_test['ds'], cv_df_val_test[model_name], label=model_name, ls='--')

# # Add highlighted areas for validation and test sets
# plt.axvspan(Y_df['ds'].iloc[300], Y_df['ds'].iloc[499], alpha=0.2, color='yellow', label='Validation Set')
# plt.axvspan(Y_df['ds'].iloc[500], Y_df['ds'].iloc[699], alpha=0.2, color='red', label='Test Set')

# # Add vertical lines for cutoffs
# for cutoff in cutoffs:
#     plt.axvline(x=cutoff, color='black', ls=':')

# # Set labels and legend
# plt.xlabel('Time steps')
# plt.ylabel('Target [H1]')
# plt.legend()

# plt.tight_layout()
# plt.show()

In [None]:
%%time
refit=24*7 # refit every week
cv_df_refit = nf.cross_validation(Y_df, n_windows=n_windows, step_size=h, refit=refit)

In [None]:
cutoffs = cv_df_refit['cutoff'].unique()

plt.figure(figsize=(15,5))
plt.plot(Y_df['ds'], Y_df['y'])
plt.plot(cv_df_refit['ds'], cv_df_refit[model_name], label=model_name, ls='--')

for cutoff in cutoffs:
    plt.axvline(x=cutoff, color='black', ls=':')

plt.xlabel('Time steps')
plt.ylabel('Target [H1]')
plt.legend()
plt.tight_layout()

In [None]:
stop here

In [None]:
%%time
cv_df_refit_overlap = nf.cross_validation(Y_df, n_windows=2, step_size=h, refit=refit)

In [None]:
cutoffs[0]

In [None]:
cutoffs

In [None]:
cutoffs = cv_df_refit_overlap['cutoff'].unique()

fold1 = cv_df_refit_overlap.query(cutoffs[0].strftime('%Y-%m-%d %H:%M:%S'))
fold2 = cv_df_refit_overlap.query(cutoffs[1].strftime('%Y-%m-%d %H:%M:%S'))

plt.figure(figsize=(15,5))
plt.plot(Y_df['ds'], Y_df['y'])
plt.plot(fold1['ds'], fold1[model_name], label=model_name+' (fold 1)', ls='--', color='blue')
plt.plot(fold2['ds'], fold2[model_name], label=model_name+' (fold 2)', ls='-.', color='red')

for cutoff in cutoffs:
    plt.axvline(x=cutoff, color='black', ls=':')

plt.xlabel('Time steps')
#plt.ylabel('Target [H1]')
plt.xlim(500, 700)
plt.legend()
plt.tight_layout()