In [None]:
import os
import warnings


In [None]:
# os.environ["PYTHONWARNINGS"]="ignore"
# os.environ["PYTORCH_ENABLE_MPS_FALLBACK"]="1"
warnings.filterwarnings("ignore")
# os.environ

In [None]:
import datetime
import glob
from functools import lru_cache, partial
from pprint import pprint

import gym_trading_env
import gymnasium as gym
import matplotlib.pyplot as plt
import MultiTrade
import numpy as np
import pandas as pd
import torch
from gym_trading_env.downloader import download
from gym_trading_env.environments import TradingEnv

from gym_trading_env.renderer import Renderer
from IPython.display import display


from tqdm.autonotebook import tqdm
from utils.utils import build_dataset, build_market_image,preprocess_data
from utils.forecast_utils import build_model_get_data,get_dataset_columns

import ray                              

from statsforecast import StatsForecast

from neuralforecast.core import NeuralForecast
from neuralforecast.models import NHITS, NBEATS, PatchTST
from neuralforecast.auto import AutoNHITS, AutoLSTM,AutoPatchTST,AutoNBEATS
from neuralforecast.losses.numpy import mae, mse
from ray import tune
from neuralforecast.losses.pytorch import MAE, MSE
# from ray.tune.search.hyperopt import HyperOptSearch
import optuna
from optuna import trial
# optuna.logging.set_verbosity(optuna.logging.WARNING) # Use this to disable training prints from optuna

In [None]:
COIN_PAIRS=['BTC/USDT','ETH/USDT','SOL/USDT','BNB/USDT','XRP/USDT','ADA/USDT',
            'ETH/BTC','SOL/ETH','BNB/ETH','XRP/ETH',"ADA/ETH",
            'SOL/BTC','SOL/BNB',
            'XRP/BTC','XRP/BNB',
            'ADA/BTC','ADA/BNB',
            ]
target_pair='ETHUSDT'
time_frame="1h"

In [None]:
import gluonts

In [None]:
from gluonts.time_feature import time_features_from_frequency_str

In [None]:
files=glob.glob(f'data/**{time_frame}.pkl',recursive=True)
time_funcs=time_features_from_frequency_str(time_frame)
t_func=time_funcs[0]
t_func

In [None]:
data=pd.read_pickle(files[0])


In [None]:
t_func(data.index)

In [None]:
data=build_market_image(target_pair='ETH/USDT',time_frame='1h',axis=0)

data=data.groupby('symbol').apply(lambda x: x[:pd.Timestamp('2024-01-01')])

data=data.reset_index(level=0,drop=True).reset_index()
id_columns=['symbol']
output_columns,feature_columns,drop_columns=get_dataset_columns(data,id_columns=id_columns)
len(output_columns)+len(data[id_columns[0]].unique())

In [None]:
data.head(5)

In [None]:
front=['y','ds','symbol']
data=data[front+[c for c in data.columns if c not in front]]
data=data[front]

In [None]:
data

In [None]:
StatsForecast.plot(data,id_col='symbol', engine='matplotlib')

In [None]:
eth_data=data[data['symbol']=='ETHUSDT'].copy()
# data=eth_data
split=int(len(eth_data)*.7)
print(split)
data.groupby('symbol').apply(lambda x: x.reset_index(drop=True)[:split])
train = data[:split]
test = data[split:]
future_exog = test[['symbol','y']]


In [None]:
search_space={

    "loss":MSE(),

    "input_size": tune.choice([12, 24, 48, 64]),              # Length of input window
    "start_padding_enabled": True,
    "n_blocks": 5*[1],                                              # Length of input window
    "mlp_units": 5 * [[64, 64]],                                  # Length of input window
    "n_pool_kernel_size": tune.choice([5*[1], 5*[2], 5*[4],         
                                      [8, 4, 2, 1, 1]]),            # MaxPooling Kernel size
    "n_freq_downsample": tune.choice([[8, 4, 2, 1, 1],
                                      [1, 1, 1, 1, 1]]),
                                                              # Interpolation expressivity ratios
    "learning_rate": tune.loguniform(1e-6, 1e-2),                   # Initial Learning rate
    "scaler_type": tune.choice([None]),                             # Scaler type
    "max_steps": tune.choice([1000]),                               # Max number of training iterations
    "batch_size": tune.choice([1, 4, 10]),                          # Number of series in batch
    "windows_batch_size": tune.choice([128, 256, 512]),             # Number of windows in batch
    "random_seed": tune.randint(1, 20),                             # Random seed
}


In [None]:
forecast_horizon=6
backend='optuna'
PatchTST_config=AutoPatchTST.get_default_config(h=forecast_horizon,backend='ray')
NHITS_config=AutoNHITS.get_default_config(h=forecast_horizon,backend='ray')
NBEATS_config=AutoNBEATS.get_default_config(h=forecast_horizon,backend='ray')
PatchTST_config

In [None]:
keys=set.intersection(*[set(PatchTST_config.keys()),set(NBEATS_config.keys()),list(NHITS_config.keys())])
keys

In [None]:
def config_patchTST(trial):

    auto_config=AutoPatchTST.get_default_config(h=forecast_horizon,backend='optuna')

    config = {**auto_config(trial)}
    config.update({
                    "enable_progress_bar":False,
                    "max_steps":5000
                   })
    return config

def config_nhits(trial):
    auto_config=AutoNHITS.get_default_config(h=forecast_horizon,backend='optuna')

    config = {**auto_config(trial)}
    config.update({
                    "enable_progress_bar":False,
                    "max_steps":5000
                   })
    return config

def config_nbeats(trial):
    auto_config=AutoNBEATS.get_default_config(h=forecast_horizon,backend='optuna')
    config = {**auto_config(trial)}
    config.update({
                    "enable_progress_bar":False,
                    "max_steps":5000
                   })
    return config

def config_lstm(trial):
    auto_config=AutoLSTM.get_default_config(h=forecast_horizon,backend='optuna')
    config = {**auto_config(trial)}
    config.update({
                    "enable_progress_bar":False,
                    "max_steps":5000
                   })
    return config

In [None]:
PatchTST_config.update({x:y for x,y in search_space.items() if x in PatchTST_config})
NHITS_config.update({x:y for x,y in search_space.items() if x in NHITS_config})
NBEATS_config.update({x:y for x,y in search_space.items() if x in NBEATS_config})
PatchTST_config

In [None]:
PatchTST_config["input_size"]=tune.choice([12, 24, 48, 64])

In [None]:
horizon = 6
context_length=5*horizon
max_steps=2000
models = [AutoNHITS(
                    h = forecast_horizon,
                    loss = MSE(),
                    num_samples = 30,
                    search_alg = optuna.samplers.TPESampler(),
                    backend = 'optuna',
                    # enable_progress_bar=False,
                    config = config_nhits,
                    ),
         AutoNBEATS(
                    h = forecast_horizon,
                    loss = MSE(),
                    num_samples = 30,
                    search_alg = optuna.samplers.TPESampler(),
                    backend = 'optuna',
                    # enable_progress_bar=False,
                    config=config_nbeats,
                    ),
         AutoPatchTST(
                    h = forecast_horizon,
                    loss = MSE(),
                    num_samples = 30,
                    search_alg = optuna.samplers.TPESampler(),
                    backend = 'optuna',
                    # enable_progress_bar=False,
                    config = config_patchTST),
        AutoLSTM(
                    h = forecast_horizon,
                    loss = MSE(),
                    num_samples = 30,
                    search_alg = optuna.samplers.TPESampler(),
                    backend = 'optuna',
                    
                    # enable_progress_bar=False,
                    config = config_lstm)
          ]

nf = NeuralForecast(models = models,
                     freq = '1h')

In [None]:
mps_device = torch.device("mps")
z = torch.ones(5, device=mps_device)
z

In [None]:
nf.fit(train,
        val_size=horizon,
        time_col='ds',                    
        target_col='y',
        id_col='symbol'
        
        )


In [None]:
model_path='forecasting_model/'
nf.save(model_path,overwrite=True)
# nf.load(model_path)

In [None]:

Y_pred = nf.predict().reset_index()
preds_df=Y_pred
preds_df['mean_pred']=preds_df.filter(like='Auto').mean(1)
preds_df= preds_df.merge(test, how='left', on=['symbol', 'ds'])
preds_df=pd.concat([train.tail(48),preds_df]).reset_index()
preds_df.tail(10)

In [None]:
fcst_df = nf.predict()
fcst_df['mean_pred']=fcst_df[['AutoNHITS', 'AutoNBEATS','AutoPatchTST']].mean(1)
fcst_df.head()

In [None]:
StatsForecast.plot(preds_df, fcst_df, id_col='symbol',engine='matplotlib', max_insample_length=100)

In [None]:
model=nf.models[0]
model.max_steps
test_df=test.reset_index(drop=True)
model.max_steps,model.step_size,

In [None]:
plot_size=500
context_length=5*horizon
end=np.random.randint(0,len(test)-plot_size)
end=end if end>0 else 1
start=end-context_length if (end-context_length)>0 else 0
pred_ids=test_df.index[start:end]
df=test_df.loc[pred_ids]
df=df[['ds','unique_id','y']]
df_plot=test_df.iloc[start:end+horizon].copy().reset_index(drop=True)

nf.dataset, nf.uids, nf.last_dates, nf.ds=nf._prepare_fit(df,
                static_df=None, 
                sort_df=None,
                predict_only=False,
                id_col='unique_id', 
                time_col='ds', 
                target_col='y')


preds_array=np.array([model.predict(nf.dataset) for model in nf.models]).squeeze().mean(0)

# preds_array
df_plot['mean_pred']=np.nan
n_values=int(-preds_array.shape[0])
ids=df_plot.index[n_values:]
df_plot.loc[ids,'mean_pred']=[c for c in preds_array]

plot_preds(df_plot)
# df

In [None]:
nf.dataset, nf.uids, nf.last_dates, nf.ds=nf._prepare_fit(test_df,
                static_df=None, 
                sort_df=None,
                predict_only=False,
                id_col='unique_id', 
                time_col='ds', 
                target_col='y')

In [None]:
from itertools import Counter

In [None]:
all_predictions=nf.predict_insample()


In [None]:
all_predictions['ds']

In [None]:

forecasts_series=all_predictions.groupby('cutoff').apply(lambda x: x.select_dtypes(np.number).values.flatten())
forecasts_array=[c for c in forecasts_series]

new_df=test_df[test_df['ds'].isin([c for c in forecasts_series.index])]
print(len(test_df),len(forecasts_array),len(new_df))

In [None]:
for model in nf.models:
    model.enable_progress_bar=False

In [None]:
nf.dataset, nf.uids, nf.last_dates, nf.ds=nf._prepare_fit(test_df[['ds','unique_id','y']].tail(horizon),
                static_df=None, 
                sort_df=None ,
                predict_only=True,
                id_col='unique_id', 
                time_col='ds', 
                target_col='y')

In [None]:
i=0
test_df=test.reset_index(drop=True)
test_df.tail(horizon)


In [None]:
future=nf.make_future_dataframe()
nf.predict()

In [None]:
test_pred_df=nf.predict_insample().reset_index()
test_pred_df['mean_pred']=test_pred_df[['NHITS', 'NBEATS','PatchTST']].mean(1)
# test_pred_df['mean_pred']=test_pred_df[['mean_pred']].rolling(horizon).mean(1)
test_pred_df


In [None]:
plot_preds(test_pred_df)
# test_pred_df


In [None]:
plot_size=500
start=np.random.randint(0,len(test)-plot_size)
plot_df=test_pred_df.iloc[start:start+plot_size]
plot_df=plot_df.sort_values('ds')
# plot_df=plot_df.groupby('cutoff').mean()
plot_df['mean_pred']=plot_df[['mean_pred']].rolling(horizon).mean()
plot_preds(plot_df,cut=True)

In [None]:
groups=plot_df.groupby('cutoff')
i=0
max_i=10
for name, df in groups:
    i+=1
    display(df)
    if i>max_i:
        break

In [None]:
plot_df.groupby('cutoff')['mean_pred'].mean()

In [None]:
preds_df[['mean_pred','y']].plot()

In [None]:
preds_df = preds_df.round({
    'NHITS': 4,
    'NBEATS': 4,
    'PatchTST': 4,

})

data = {'N-HiTS': [mae(preds_df['NHITS'], preds_df['y']), mse(preds_df['NHITS'], preds_df['y'])],
       'N-BEATS': [mae(preds_df['NBEATS'], preds_df['y']), mse(preds_df['NBEATS'], preds_df['y'])],
       'PatchTST': [mae(preds_df['PatchTST'], preds_df['y']), mse(preds_df['PatchTST'], preds_df['y'])],
       }

metrics_df = pd.DataFrame(data=data)
metrics_df.index = ['mae', 'mse']

metrics_df.style.highlight_min(color='lightgreen', axis=1)