In [4]:
import datetime
import glob
from functools import lru_cache, partial
from pprint import pprint

import gym_trading_env
import gymnasium as gym
import matplotlib.pyplot as plt
import MultiTrade
import numpy as np
import pandas as pd
import torch
from gym_trading_env.downloader import download
from gym_trading_env.environments import TradingEnv

from gym_trading_env.renderer import Renderer
from IPython.display import display
from ray import train, tune
from tqdm.autonotebook import tqdm
from utils.utils import build_dataset, build_market_image,preprocess_data
from utils.forecast_utils import build_model_get_data,get_dataset_columns

import ray

from transformers import (
    EarlyStoppingCallback,
    PatchTSTConfig,
    PatchTSTForPrediction,
    Trainer,
    TrainingArguments,
)


In [5]:
COIN_PAIRS=['BTC/USDT','ETH/USDT','SOL/USDT','BNB/USDT','XRP/USDT','ADA/USDT',
            'ETH/BTC','SOL/ETH','BNB/ETH','XRP/ETH',"ADA/ETH",
            'SOL/BTC','SOL/BNB',
            'XRP/BTC','XRP/BNB',
            'ADA/BTC','ADA/BNB',
            ]
target_pair='ETHUSDT'
time_frame="1h"

In [6]:
from transformers import PatchTSTConfig
import warnings
warnings.filterwarnings("ignore")

from tsfm_public.toolkit.dataset import ForecastDFDataset
from tsfm_public.toolkit.time_series_preprocessor import TimeSeriesPreprocessor
from tsfm_public.toolkit.time_series_forecasting_pipeline import TimeSeriesForecastingPipeline
from tsfm_public.toolkit.util import select_by_index

In [7]:
data=build_market_image(target_pair='ETH/USDT',time_frame='1h',axis=0)
data=data.groupby('symbol').apply(lambda x: x[:pd.Timestamp('2024-01-01')])

# # data.to_csv('data/binance-market-1h.csv')

# data=data.groupby('symbol').apply(lambda x: x[:pd.Timestamp('2024-01-01')])

data=data.reset_index(level=0,drop=True).reset_index()
data=data[data['symbol']=='ETHUSDT'].copy()
data['symbol'].unique()
id_columns=['symbol']
output_columns,feature_columns,drop_columns=get_dataset_columns(data,id_columns=['symbol'])
len(output_columns)+len(data[id_columns[0]].unique())

2

In [5]:
search_space={
            'context_length': tune.choice([c for c in range(24,64,2)]),
            'patch_length': tune.choice([c for c in range(2,8,2)]),
            'forecast_horizon': tune.choice([c for c in range(2,12,2)]),
            'random_mask_ratio':tune.uniform(1e-6, .50),
            'd_model': tune.choice([64,128,256]),
            'embed_div':tune.choice([1,2,4,8]),
            # 'num_attention_heads': tune.choice([c for c in range(18,36,2)]),
            'num_hidden_layers': tune.choice([c for c in range(2,12,2)]),
            'ffn_dim': tune.choice([64,128,256]),
            'dropout': tune.choice([.1,.2,.3,.4,.5]),
            'head_dropout': tune.choice([.1,.2,.3,.4,.5]),
            'channel_attention': tune.choice([True,False]),

            }

In [6]:
torch.cuda.is_available()

True

In [7]:
from torch.utils.data import DataLoader

In [8]:
# # model_data=build_model_get_data(data=data,
# #                                 **params)
# model_data['train_dataset'].n_

In [None]:
batch_size=48
num_workers=8
data_id=ray.put(data)

def objective(params):
    batch_size=64
    num_workers=1

    params.update( {'scaling': 'std',
                    'loss': 'mse',
                     'pre_norm': True,
                    'norm_type': 'batchnorm',
                    'id_columns':['symbol'],
                    'timestamp_column': "date_open",
                    'num_attention_heads': int(params['d_model']/params['embed_div'])
                    })

    data=ray.get(data_id)
    model_data=build_model_get_data(data=data,
                                    **params)
    
    model = model_data['model']
    train_dataset = model_data['train_dataset']
    valid_dataset = model_data['valid_dataset']
    test_dataset = model_data['test_dataset']

    # train_dataloader=DataLoader(train_dataset)
    # valid_dataloader=DataLoader(valid_dataset)
    # test_dataloader=DataLoader(test_dataset)
    training_args = TrainingArguments(
                                    output_dir=f"forecaster_pretrain/output/",
                                    overwrite_output_dir=True,
                                    # learning_rate=0.001,
                                    num_train_epochs=1000,
                                    do_eval=True,
                                    evaluation_strategy="epoch",

                                    per_device_train_batch_size=batch_size,
                                    per_device_eval_batch_size=batch_size,

                                    dataloader_num_workers=num_workers,
                                    
                                    save_strategy="epoch",
                                    logging_strategy="epoch",
                                    save_total_limit=3,
                                    
                                    logging_dir=f"forecaster_pretrain/logs/",  # Make sure to specify a logging directory
                                    load_best_model_at_end=True,  # Load the best model when training ends
                                    metric_for_best_model="eval_loss",  # Metric to monitor for early stopping
                                    greater_is_better=False,  # For loss
                                    label_names=["future_values"],
                                    disable_tqdm=False
                                )

    # Create the early stopping callback
    early_stopping_callback = EarlyStoppingCallback(
        early_stopping_patience=100,  # Number of epochs with no improvement after which to stop
        early_stopping_threshold=0.0001,  # Minimum improvement required to consider as improvement
    )

    # define trainer

    trainer = Trainer(
                    model=model,
                    args=training_args,
                    train_dataset=train_dataset,
                    eval_dataset=valid_dataset,
                    callbacks=[early_stopping_callback],
                    # compute_metrics=compute_metrics,
                        )
    trainer.train()
    
    result=trainer.state.log_history[-1]
    # print(result)
    obj=trainer.evaluate(test_dataset)
    
    obj['_metric']=obj['eval_loss']
    obj['score']=obj['eval_loss']
    # train.report(obj)
    return obj



In [None]:
config={x:y.sample() for x,y in search_space.items()}
# print(config)
# objective(config)

In [None]:
def trial_str_creator(trial):
    return "{}_{}_forcaster".format(trial.trainable_name, trial.trial_id)



tune_config=tune.TuneConfig(num_samples=6,mode="min",search_alg='hyperopt',
                                trial_name_creator=trial_str_creator,
                                
                                trial_dirname_creator=trial_str_creator,

                            )
run_config=train.RunConfig(
    storage_path='C:/Users/standard/Git/MultiTrader/forecaster_pretrain/Hyperparam_runs/', 
    name="forecaster_experiments")
resource_group = tune.PlacementGroupFactory([{"GPU": 1}])
# objective_with_resources = tune.with_resources(objective, resource_group)
objective_with_resources = tune.with_resources(objective, {"cpu": .5})



In [None]:
if not ray.is_initialized():
    ray.init()
else:
    ray.shutdown()
    ray.init()



In [None]:
data_id=ray.put(data)
print(data_id)
tuner = tune.Tuner(objective_with_resources ,
                   tune_config=tune_config,

                   run_config=run_config,
                    param_space=search_space)

In [None]:
results = tuner.fit()


In [1]:
print(results.get_best_result(metric="score", mode="min").config)

NameError: name 'results' is not defined

In [None]:
if ray.is_initialized():
    ray.shutdown()

In [None]:
print(results.get_best_result(metric="score", mode="min").config)
best_params=results.get_best_result(metric="score", mode="min").config

In [None]:
objective(best_params)

In [None]:
import tensorboardX
import time

In [None]:
model=PatchTSTForPrediction.from_pretrained("C:/Users/standard/Git/MultiTrader/stacked_pretrain/output/checkpoint-473186")

In [None]:
hf_df=test_dataset
dir(hf_df)
x_test=hf_df[0]['past_values']
x_future=hf_df[0]['future_values']

x_test=x_test.unsqueeze(0)
# x.shape
# x_future.shape

In [None]:
def _plot_preds(axes,size,idx,symbol_idx,model):
    x_past=hf_df[idx]['past_values'].unsqueeze(0)
    x_future=hf_df[idx]['future_values']

    x_pred=model(x_past)['prediction_outputs'].squeeze().cpu().detach().numpy()
    x_past=x_past.squeeze().cpu().detach().numpy()
    x_future=x_future.cpu().detach().numpy()

    x_past=x_past[:,symbol_idx].flatten()
    x_future=x_future[:,symbol_idx].flatten()
    x_pred=x_pred[:,symbol_idx].flatten()

    x_true=np.concatenate([x_past,x_future])
    forecast_horizon=len(x_pred)
    context_length=len(x_true)

    x_plot=np.arange(size)
    x_plot=x_plot+idx
    pred_plot=x_plot[-forecast_horizon:]
    y_true=[]
    y_hat=[]
    for i in tqdm(range(idx,idx+size)):
        y_true.append(x_true[-1])
        

    axes.scatter(x_plot[-1],x_true[-1], color="blue",label='True')
    axes.scatter(pred_plot[-1],x_pred[-1], color="red",label='Pred' ,alpha=0.5)
    

    plot_id=48
    fig, axes = plt.subplots()
    for i in tqdm(range(idx,idx+48)):
        _plot_preds(axes,idx=i,symbol_idx=2,model=model)
        
        if i==0:
            fig.show()
        else:
            fig.canvas.draw()
        fig.legend()
    



In [None]:
plot_size=forecast_horizon +context_length
plot_size
pred_size=x_true.shape[0]

In [None]:

x_plot=np.arange(plot_size)


In [None]:
idx=-1

plt.plot(x_plot[:-forecast_horizon],x_test[:,idx])
plt.plot(x_plot[-forecast_horizon:],x_pred[:,idx],label='Pred')
plt.plot(x_plot[-forecast_horizon:],x_true[:,idx],label='True')
plt.legend()

In [None]:
# trainer.evaluate(x
#                  )
# model=trainer.model
trainer.evaluate(test_dataset)

In [None]:
pred=trainer.prediction_step(hf_df[0]).predictions


In [None]:
pred[0].shape

In [None]:
test_data=test_data.reset_index(drop=True)

In [None]:
results = trainer.evaluate(test_dataset)
print("Test result:")
print(results)