In [1]:
import datetime
import glob
from functools import lru_cache, partial
from pprint import pprint

import gym_trading_env
import gymnasium as gym
import matplotlib.pyplot as plt
import MultiTrade
import numpy as np
import pandas as pd
import torch
from gym_trading_env.downloader import download
from gym_trading_env.environments import TradingEnv
from gym_trading_env.renderer import Renderer
from IPython.display import display
from ray import train, tune
from tqdm.autonotebook import tqdm
from utils import build_dataset, build_market_image,preprocess_data,stack_arrays
from forecast_utils import create_transformation,create_instance_splitter,create_train_dataloader,create_backtest_dataloader
from gluonts.time_feature import time_features_from_frequency_str
from gluonts.time_feature import get_lags_for_frequency
from datasets import load_dataset,Dataset,DatasetDict

from tsfm_public.toolkit.dataset import ForecastDFDataset
from tsfm_public.toolkit.time_series_preprocessor import TimeSeriesPreprocessor
from tsfm_public.toolkit.util import select_by_index
from transformers import (
    EarlyStoppingCallback,
    PatchTSTConfig,
    PatchTSTForPrediction,
    Trainer,
    TrainingArguments,
)

In [2]:
from pearl.pearl_agent import PearlAgent
from pearl.utils.instantiations.environments.gym_environment import GymEnvironment
from pearl.neural_networks.sequential_decision_making.q_value_networks import VanillaQValueNetwork
from pearl.utils.functional_utils.experimentation.set_seed import set_seed
from pearl.policy_learners.sequential_decision_making.deep_q_learning import DeepQLearning
from pearl.policy_learners.sequential_decision_making.double_dqn import DoubleDQN
from pearl.replay_buffers.sequential_decision_making.fifo_off_policy_replay_buffer import FIFOOffPolicyReplayBuffer
from pearl.utils.functional_utils.train_and_eval.online_learning import online_learning
from pearl.action_representation_modules.one_hot_action_representation_module import OneHotActionTensorRepresentationModule

In [3]:
COIN_PAIRS=['BTC/USDT','ETH/USDT','SOL/USDT','BNB/USDT','XRP/USDT','ADA/USDT',
            'ETH/BTC','SOL/ETH','BNB/ETH','XRP/ETH',"ADA/ETH",
            'SOL/BTC','SOL/BNB',
            'XRP/BTC','XRP/BNB',
            'ADA/BTC','ADA/BNB',
            ]
target_pair='ETHUSDT'
time_frame="1h"

In [4]:

# download(exchange_names = ["binance"],
#     symbols= tqdm(COIN_PAIRS),
#     timeframe= time_frame,
#     dir = "data",
#     since= datetime.datetime(year= 2024, month= 1, day=1),
# )

In [5]:
data=build_market_image(target_pair='ETH/USDT',time_frame='1h',axis=1)

data.head(5)
feature_cols=[c for c in data.filter(like='obs').columns]

target_cols=[c for c in data.columns if c not in feature_cols]
data=data[target_cols+sorted(feature_cols)]
data

Unnamed: 0_level_0,open_ADABNB,high_ADABNB,low_ADABNB,close_ADABNB,volume_ADABNB,feature_MA_20_ADABNB,feature_MA_50_ADABNB,feature_MA_200_ADABNB,feature_log_return_2_ADABNB,feature_log_volume_2_ADABNB,...,feature_log_return_10_XRPUSDT,feature_log_volume_10_XRPUSDT,feature_log_return_15_XRPUSDT,feature_log_volume_15_XRPUSDT,feature_log_return_20_XRPUSDT,feature_log_volume_20_XRPUSDT,feature_log_return_25_XRPUSDT,feature_log_volume_25_XRPUSDT,feature_log_return_30_XRPUSDT,feature_log_volume_30_XRPUSDT
date_open,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-01 06:00:00,-0.284797,-0.283175,-0.283433,-0.284507,22079.1,0.000000,0.000000,0.000000,1.110035,40.249582,...,0.863289,42.552923,0.914124,42.752143,1.008632,42.059123,0.998585,42.278809,0.996668,42.584266
2022-01-01 07:00:00,-0.284805,-0.283165,-0.283428,-0.284496,23820.6,0.000000,0.000000,0.000000,1.110035,40.249582,...,0.863289,42.552923,0.914124,42.752143,1.008632,42.059123,0.998585,42.278809,0.996668,42.584266
2022-01-01 08:00:00,-0.284790,-0.283164,-0.283417,-0.284493,15624.1,0.000000,0.000000,0.000000,1.115493,39.903766,...,0.863289,42.552923,0.914124,42.752143,1.008632,42.059123,0.998585,42.278809,0.996668,42.584266
2022-01-01 09:00:00,-0.284790,-0.283162,-0.283414,-0.284490,235241.5,0.000000,0.000000,0.000000,1.112368,42.539644,...,0.863289,42.552923,0.914124,42.752143,1.008632,42.059123,0.998585,42.278809,0.996668,42.584266
2022-01-01 10:00:00,-0.284784,-0.283159,-0.283425,-0.284505,81681.5,0.000000,0.000000,0.000000,1.105358,41.903595,...,0.863289,42.552923,0.914124,42.752143,1.008632,42.059123,0.998585,42.278809,0.996668,42.584266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-01 13:00:00,-0.286292,-0.284668,-0.284926,-0.286004,57866.7,0.001073,0.001072,0.001103,1.099721,42.642549,...,0.840881,42.822902,0.889805,42.999112,0.988619,43.916807,0.976975,43.255893,0.974101,43.059799
2024-04-01 14:00:00,-0.286298,-0.284676,-0.284927,-0.286009,8675.6,0.001073,0.001071,0.001102,1.099672,39.546697,...,0.821955,43.967464,0.869766,44.183344,0.967299,43.580044,0.956455,44.081933,0.955972,43.993916
2024-04-01 15:00:00,-0.286305,-0.284672,-0.284931,-0.286003,50429.5,0.001072,0.001071,0.001102,1.110977,40.112017,...,0.851684,42.241919,0.863089,43.784767,0.961407,43.908307,0.953430,44.444491,0.953427,45.040838
2024-04-01 16:00:00,-0.286299,-0.284663,-0.284922,-0.285993,36332.9,0.001072,0.001071,0.001101,1.125073,41.681792,...,0.848340,41.810711,0.871833,43.233104,0.970953,43.251450,0.962182,43.838746,0.961702,44.051195


In [6]:
def get_train_test_envs(data,symbol='ETHUSDT',look_back=7):
    start_date=datetime.datetime(year= 2024, month= 2, day=1)
    split_date=start_date+datetime.timedelta(days=7)
    end_date=split_date+datetime.timedelta(days=look_back)
    
    # data=build_market_image(target_pair='ETH/USDT',time_frame='1h')
    model=PatchTSTForPrediction.from_pretrained("C:/Users/standard/Git/MultiTrader/stacked_pretrain/output/checkpoint-64820")
    hf_data=data.copy()
    hf_train_data=hf_data.groupby('symbol').apply(lambda x: x[start_date:split_date])
    hf_train_data=hf_train_data.reset_index(level=0,drop=True).reset_index()


    trade_data=data[data['symbol']==symbol].copy()
    train_data=trade_data[start_date:split_date]

    train_env = MultiTrade.ForecastingTradingEnv(
                                        model=model,
                                        hf_data=hf_train_data,
                                        context_length=48,
                                        forecast_horizon=12,

                                        name='ETHUSDT_train',
                                        df = train_data, # Your dataset with your custom features
                                        positions = [ -.25, 0, .25], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
                                        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
                                        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
                                        
                                        )
    
    
    hf_test_data=hf_data.groupby('symbol').apply(lambda x: x[split_date:end_date])
    hf_test_data=hf_test_data.reset_index(level=0,drop=True).reset_index()
    test_data=trade_data[split_date:end_date]
    test_env = MultiTrade.ForecastingTradingEnv(
                                        model=model,
                                        hf_data=hf_test_data,
                                        context_length=48,
                                        forecast_horizon=12,
                                        name='ETHUSDT_test',
                                        df = test_data, # Your dataset with your custom features
                                        positions = [ -.25, 0, .25], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
                                        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
                                        borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
                                        
                                    )
    return train_env,test_env

In [7]:
# target_pair='ETHUSDT'
# train_env=train_envs[target_pair]
# train_env=GymEnvironment(train_env)
# test_env=test_envs[target_pair]
# test_env=GymEnvironment(test_env)

# hidden_dims = [64, 64]
# train_env.action_space
train_env,test_env=get_train_test_envs(data,look_back=7)
train_env=GymEnvironment(train_env)
test_env=GymEnvironment(test_env)
obs,info=test_env.reset()
obs

KeyError: 'symbol'

In [None]:
# obs=train_env.reset()
# # env.action_space.sa
# for i in range(20):
#     act=train_env.action_space.sample()
#     act_result=train_env.step(act)
# act_result

In [None]:
# obs=train_env.reset()
# # env.action_space.sa
# for i in range(20):
#     act=train_env.action_space.sample()
#     act_result=train_env.step(act)
# act_result

In [None]:
search_space={
        "look_back" : tune.choice([7,14,21,30,45,60]),

        "hidden_dims" : tune.choice([[64,64],[128,128],[256,256]]),

        'learning_rate':tune.uniform(1e-6, 1e-2),

        'discount_factor': tune.uniform(1e-6, 1),

        'training_rounds': tune.choice([c for c in range(2,64,2)]),

        'batch_size': tune.choice([64,128,256]),
        
        'target_update_freq':tune.choice([c for c in range(2,64,2)]),
        'soft_update_tau': tune.uniform(1e-6, 1),  # a value of 1 indicates no soft updates
        "replay_buffer_size":tune.choice([c for c in range(10,1_000,10)]),
            }
search_space

In [None]:
config={x:y.sample() for x,y in search_space.items()}


In [None]:
def plot_pearl(pearl_env):
    naked_env=pearl_env.env.unwrapped
    value_history=naked_env.historical_info['portfolio_valuation']
    x=np.arange(len(value_history))
    y=value_history
    plt.plot(x,y)


In [None]:
def objective(config):

    # pprint(config)

    look_back=config.pop("look_back")
    train_env,test_env=get_train_test_envs(look_back)

    train_env=GymEnvironment(train_env)
    test_env=GymEnvironment(test_env)
    
    hidden_dims=list(config.pop("hidden_dims"))
    replay_buffer_size=config.pop("replay_buffer_size")
    
    # hidden_dims=config.pop( 'critic_hidden_dims')

    # Instead of using the 'network_type' argument, use the 'network_instance' argument.
    # Pass Q_value_network as the `network_instance` to the `DeepQLearning` policy learner.
    # We will be using a one hot representation for representing actions. So take action_dim = num_actions.
    Q_network_DoubleDQN = VanillaQValueNetwork(state_dim=train_env.observation_space.shape[0],  # dimension of the state representation
                                        action_dim=train_env.action_space.n,                        # dimension of the action representation
                                        hidden_dims=hidden_dims,                       # dimensions of the intermediate layers
                                        output_dim=1)  
    # Instead of using the 'network_type' argument, use the 'network_instance' argument.
    # Pass Q_value_network as the `network_instance` to the `DoubleDQN` policy learner.
    DoubleDQNagent = PearlAgent(
                                policy_learner=DoubleDQN(
                                                            state_dim=train_env.observation_space.shape[0],
                                                            action_space=train_env.action_space,

                                                            network_instance=Q_network_DoubleDQN,   # pass an instance of Q value network to the policy learner.
                                                            action_representation_module=OneHotActionTensorRepresentationModule(
                                                                                                                                    max_number_actions=train_env.action_space.n
                                                                                                                                ),
                                                                                                                                
                                                            **config
                                                        ),
                                replay_buffer=FIFOOffPolicyReplayBuffer(replay_buffer_size),
                            ) 
    ## train dat bitch               
    info = online_learning(
                            agent=DoubleDQNagent ,
                            env=train_env,
                            number_of_episodes=200,
                            print_every_x_episodes=10,   # print returns after every 10 episdoes
                            learn_after_episode=False,    # updating after every environment interaction, Q networks are updates at the end of each episode
                            seed=0
                            )
    # plot_results(info)
    agent=DoubleDQNagent
    observation, action_space = test_env.reset()
    agent.reset(observation, action_space)
    done = False
    while not done:
        action = agent.act(exploit=True)
        action_result = test_env.step(action)
        agent.observe(action_result)
        agent.learn()
        done = action_result.done

    # plot_pearl(test_env)
    score=action_result.info['portfolio_valuation']/1000
    loss={"score": -score}
    print(loss)
    return loss

In [None]:
config={x:y.sample() for x,y in search_space.items()}

# objective(config)

In [None]:

def trial_str_creator(trial):
    return "{}_{}_123".format(trial.trainable_name, trial.trial_id)

tune_config=tune.TuneConfig(num_samples=6,mode="min",search_alg='hyperopt',
                                trial_name_creator=trial_str_creator,
                                trial_dirname_creator=trial_str_creator,

                            )
run_config=train.RunConfig(
    storage_path='C:/Users/standard/OneDrive/Documents/Git/MultiTrader/tune_results', 
    name="DDQN_experiments")
objective_with_resources = tune.with_resources(objective, {"cpu": 0.5})
tuner = tune.Tuner(objective_with_resources ,
                   tune_config=tune_config,
                   run_config=run_config,
                    param_space=search_space)  # ③




In [None]:
results = tuner.fit()
print(results.get_best_result(metric="score", mode="min").config)

In [None]:
test_env.env.unwrapped.save_for_render(dir = "test_render_logs")

In [None]:
import tensorboard

In [None]:

renderer = Renderer(render_logs_dir = "test_render_logs")

In [None]:
# Add Custom Metrics (Annualized metrics)
renderer.add_metric(
    name = "Annual Market Return",
    function = lambda df : f"{ ((df['close'].iloc[-1] / df['close'].iloc[0])**(pd.Timedelta(days=365)/(df.index.values[-1] - df.index.values[0]))-1)*100:0.2f}%"
)
renderer.add_metric(
        name = "Annual Portfolio Return",
        function = lambda df : f"{((df['portfolio_valuation'].iloc[-1] / df['portfolio_valuation'].iloc[0])**(pd.Timedelta(days=365)/(df.index.values[-1] - df.index.values[0]))-1)*100:0.2f}%"
)

renderer.run()