In [None]:
from neuralforecast import NeuralForecast
from ray.tune.search.hyperopt import HyperOptSearch
from neuralforecast.losses.pytorch import MAE
from ray import tune
import ray

# import neuralforecast.auto
from neuralforecast.auto import AutoNHITS, AutoRNN, AutoLSTM, AutoGRU, AutoTCN, AutoDeepAR, AutoDilatedRNN, AutoBiTCN
from neuralforecast.auto import AutoMLP, AutoNBEATS, AutoNBEATSx, AutoDLinear, AutoNLinear, AutoTiDE, AutoDeepNPTS
from neuralforecast.auto import AutoTFT, AutoVanillaTransformer, AutoInformer, AutoAutoformer, AutoFEDformer
from neuralforecast.auto import AutoPatchTST, AutoiTransformer, AutoTimesNet

import torch
import pandas as pd
import numpy as np
import yfinance as yf

import logging

logging.getLogger('pytorch_lightning').setLevel(logging.ERROR)
ray.init(log_to_driver=False)

In [None]:
# dataset = 'm6dataset-daily.csv'
dataset = 'm6dataset-hourly.csv'

m6_df = pd.read_csv(dataset)
m6_df.head(10)

In [None]:
date_column_name = m6_df.columns[0]
date_format = '%Y-%m-%d' if date_column_name.lower() == 'date' else '%Y-%m-%d %H:%M:%S'

date_column_name, date_format

In [None]:
# Converter a coluna para datetime removendo o fuso horário
m6_df[date_column_name] = pd.to_datetime(m6_df[date_column_name]).dt.tz_convert(None)

m6_df.head(10)

In [None]:
split_date = pd.to_datetime('2024-08-26')
#split_date = pd.to_datetime('2025-01-01')

# Find the first row where the date is equal or greater than split_date
def get_split_date_index(df, split_date):
  for i in range(len(df)):
    if df.iloc[i, 0] >= split_date:
      return i

split_idx = get_split_date_index(m6_df, split_date)
split_idx, m6_df.iloc[split_idx, 0]

In [None]:
# m6_df.iloc[:split_idx, :]

In [None]:
# prompt: create a method for plotting df using an interactive line graph, one line for columns, starting from column 2, column 1 is the index, ignore column 0. the graph must show only the legend and value of the line where the mouse hovers over.
import plotly.express as px

def plot_df(df):
  df = df.set_index(df.columns[0])
  fig = px.line(df, x=df.index, y=df.columns[2:], hover_data={"variable":False})
  fig.update_traces(mode='lines', hovertemplate=None)
  # fig.update_layout(hovermode="x unified")
  fig.update_layout(
      title='M6 Dataset',
      xaxis_title='Days',
      yaxis_title='Adj. Values',
      hovermode='closest',  # Ensures that only the data point under the cursor is displayed
      showlegend=True       # Display legend
  )
  fig.show()

In [None]:
# plot_df(m6_df)

##Nixtla

In [None]:
import warnings

warnings.filterwarnings('ignore')

#torch.set_float32_matmul_precision('medium' | 'high' | 'highest')
torch.set_float32_matmul_precision('highest')

In [None]:
# loading pre-trained model
folder = './models/'

MODEL_NAMES = [
    'lstm', # ......... 0
    'gru', # .......... 1
    'mlp', # .......... 2
    'dlinear', # ...... 3
    'nlinear', # ...... 4
    'informer', # ..... 5
    'autoformer', # ... 6
    'fedformer', # .... 7
    'bitcn', # ........ 8
    'rnn', # .......... 9
]

In [None]:
# convert yfinance format to nixtla's
# def convert_nixtla(df):
#   ndf = pd.DataFrame(columns=['unique_id', 'ds', 'y'])

#   for col in df.columns[1:]:
#     temp = df[[date_column_name, col]].copy()
#     temp['unique_id'] = col
#     temp.rename(columns={date_column_name:'ds', col: 'y'}, inplace=True)
#     ndf = pd.concat([ndf, temp], ignore_index=True)
#   ndf['ds']=pd.to_datetime(ndf['ds'])

#   return ndf

In [None]:
# decomposes a pandas yfinance dataframe in numpy arrays so they can be manipulated more efficiently
# than directly on the dataframe for producing nixtla's bizzare [unique_df, ds, y] dataframe
# def np_decompose(df, idx):
#   ncols = np.array(df.columns[1:])
#   ndates = df.iloc[:idx, 0].to_numpy()
#   ndata = df.iloc[:idx, 1:].to_numpy().transpose()

#   return ncols, ndates, ndata

In [None]:
# convert yfinance format to nixtla's bizzarre dataframe format
# def gen_nixtlas_bizzarre_dataframe(dec_df):
#   ncols, ndates, ndata = dec_df
#   rows, cols = ndata.shape

#   unique_id = np.repeat(ncols, cols)
#   ds = np.tile(ndates, rows)
#   y = ndata.reshape(-1)

#   return pd.DataFrame({'unique_id': unique_id, 'ds': ds, 'y': y})

In [None]:
# pandas-numpy version (slower, memory-greedier)
# def convert_nixtla(df, idx):
#   return gen_nixtlas_bizzarre_dataframe(np_decompose(df, idx))

In [None]:
# pure pandas version (faster, more memory-friendly)
def convert_nixtla(df, idx):
  # Convert from wide to long format
  df_long = df.iloc[:idx, :].melt(id_vars=[date_column_name], var_name="ticker", value_name="price")

  # Rename columns for Nixtla’s long format and return
  return df_long.rename(columns={date_column_name: "ds", "ticker": "unique_id", "price": "y"})

In [None]:
from IPython.display import clear_output

def forecast(nf, df, idx):#, train_df, test_df):
  test_rows = len(df) - idx
  counter = 0

  # creating prediction dataframe
  preds = df.iloc[split_idx:, :].copy() # alocating
  preds.reset_index(drop=True, inplace=True) # reseting index
  preds[:] = 0 # zeroing values

  for i in range(test_rows):

    # just printing % progress bar
    div = (i * 1000) // test_rows
    if (div > counter):
        clear_output(wait=True)
        print(f'{((i * 100) / test_rows):.1f}%')
        counter = div

    # forecasting
    nixtla_df=convert_nixtla(df, split_idx + i)
    pred = nf.predict(df=nixtla_df)

    # transposing the prediction and adjusting columns
    date_value = pd.to_datetime(pred['ds'].iloc[0]).strftime(date_format)
    pred.set_index('unique_id', inplace=True)
    predt = pred.drop(columns=['ds']).T

    # copying forecasted row to preds dataframe
    preds.iloc[i, 0] = date_value
    preds.iloc[i, 1:] = predt.iloc[0, :]

  preds.columns = [preds.columns[0]] + list(predt.columns)

  return preds.reset_index(drop=True)

# i = 0
# print(f'########## Forecasting {MODEL_NAMES[i].upper()} ({i + 1}/{len(MODEL_NAMES)}) ##########')
# nf = NeuralForecast.load(folder + MODEL_NAMES[i])
# forecast(nf, m6_df, split_idx)

In [None]:
%%time
#forecasts = []
ffolder = './forecasts/'

# copy of m6_df without the 'Real' column
m6_df_no_real = m6_df.drop('Real', axis=1)

#for i in range(len(MODEL_NAMES)):
#  print()
i = 0
print(f'########## Forecasting {MODEL_NAMES[i].upper()} ({i + 1}/{len(MODEL_NAMES)}) ##########')
nf = NeuralForecast.load(folder + MODEL_NAMES[i])
#forecasts.append()
fc = forecast(nf, m6_df_no_real, split_idx)#m6_train, m6_test)
#fc.to_csv(ffolder + 'forecast-' + MODEL_NAMES[i] + '.csv', index=False)

In [None]:
import IPython
IPython.display.Audio("file_example_MP3_1MG.mp3", autoplay=True)