In [1]:
%pip install gym gym-anytrading gym-trading-env stable-baselines3
import gymnasium as gym

from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3 import DQN

# do not remove!
from gym_trading_env.environments import TradingEnv
from gym_trading_env.renderer import Renderer

import numpy as np

from pandas.core.frame import DataFrame
from pandas._libs.tslibs.timestamps import Timestamp

##
import sys
sys.path.append('../../')

from coin_data import get_coin_data


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## Implement gym-trading env
https://gym-trading-env.readthedocs.io/en/latest/rl_tutorial.html

In [2]:
from agents.rl_agent.util import eval_model, get_env, preprocess_data, train_model


data = get_coin_data('BTC/USDT', '1h', start_date=Timestamp('2021-01-01'))

coin_data = preprocess_data(data, window_size=60)

env = get_env(
    'BTC/USDT', 
    coin_data, 
    window_size=60, 
    positions=[-3, -2, -1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1, 2, 3],
)
model = DQN('MlpPolicy', env, learning_starts=100, verbose=1, tau=0.9, exploration_initial_eps=0.5, exploration_fraction=0.1, exploration_final_eps=0.05)
train_model('DQN', model, env, 1e5)

eval_model('DQN', model, env, coin_data, True)
# data.rename(columns={
#     'Open': 'open',
#     'High': 'high',
#     'Low': 'low',
#     'Close': 'close',
#     'Volume': 'volume'
# }, inplace=True)
# data.sort_index(inplace=True)
# data

UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.)

In [3]:
def preprocess(coin_data: DataFrame, window_size: int = 60) -> DataFrame:
    """
    Method preprocesses the coin data.
    It normalizes the columns and drops the NaNs.

    Columns 
        * 'Open': divide by 'Close'
        * 'High': divide by 'Close'
        * 'Low': divide by 'Close'
        * 'Close': pct_change()
        * 'Volume': divide by rolling 10-day max

    :param data: DataFrame with coin data
    :return: preprocessed DataFrame
    """
    coin_data["feature_Close"] = coin_data["close"].pct_change()
    coin_data["feature_High"] = coin_data["high"] / coin_data["close"]
    coin_data["feature_Low"] = coin_data["low"] / coin_data["close"]
    coin_data["feature_Open"] = coin_data["open"] / coin_data["close"]
    coin_data["feature_Volume"] = coin_data["volume"] / coin_data["volume"].rolling(window_size).max()

    coin_data.dropna(inplace=True)
    return coin_data

In [4]:
coin_data = preprocess(data)
coin_data

Unnamed: 0_level_0,open,high,low,close,volume,feature_Close,feature_High,feature_Low,feature_Open,feature_Volume
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-01-03 11:00:00,33877.96,34450.00,33787.55,34413.53,4116.853141,0.015809,1.001060,0.981810,0.984437,0.261665
2021-01-03 12:00:00,34413.53,34600.00,33928.75,34103.72,4546.283481,-0.009003,1.014552,0.994869,1.009084,0.288960
2021-01-03 13:00:00,34103.73,34385.02,33800.00,33880.00,4373.738376,-0.006560,1.014906,0.997639,1.006604,0.277993
2021-01-03 14:00:00,33877.98,34150.00,33450.00,33811.54,5928.805563,-0.002021,1.010010,0.989307,1.001965,0.376832
2021-01-03 15:00:00,33811.54,33873.45,32727.00,33506.62,8391.249757,-0.009018,1.010948,0.976732,1.009100,0.533344
...,...,...,...,...,...,...,...,...,...,...
2023-12-12 18:00:00,40702.99,41421.14,40680.00,41305.44,2334.042350,0.014801,1.002801,0.984858,0.985415,0.215786
2023-12-12 19:00:00,41305.44,41330.00,41025.00,41137.99,1268.109820,-0.004054,1.004667,0.997253,1.004070,0.117239
2023-12-12 20:00:00,41137.99,41294.00,41063.18,41229.61,1111.426630,0.002227,1.001562,0.995963,0.997778,0.102753
2023-12-12 21:00:00,41229.61,41381.10,41042.08,41104.02,1216.505640,-0.003046,1.006741,0.998493,1.003055,0.112468


In [5]:
# make reward funcition
def reward_function(history):
    return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2])

In [6]:
env = gym.make(
    "TradingEnv",
    name="BTC/USD",
    df=coin_data,
    windows=5,
    positions=[-5, -4, -3, -2, -1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5],
    trading_fees=0.001,
    borrow_interest_rate=0.0003/100,  # 0.003% per timestamp (one timestamp = 1 hour)
    reward_function=reward_function,
    portfolio_initial_value=1000,
)

In [7]:

env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Length', lambda history : len(history['position']) )

done, truncated = False, False
observation, info = env.reset()
print(info)
while not done and not truncated:
    action = env.action_space.sample()
    observation, reward, done, truncated, info = env.step(action)

env.render()

env.unwrapped.save_for_render()

{'idx': 4, 'step': 0, 'date': numpy.datetime64('2021-01-03T15:00:00.000000000'), 'position_index': 11, 'position': 0.75, 'real_position': 0.75, 'data_high': 33873.45, 'data_open': 33811.54, 'data_volume': 8391.249757, 'data_close': 33506.62, 'data_low': 32727.0, 'portfolio_valuation': 1000.0, 'portfolio_distribution_asset': 0.02238363642766713, 'portfolio_distribution_fiat': 250.0, 'portfolio_distribution_borrowed_asset': 0, 'portfolio_distribution_borrowed_fiat': 0, 'portfolio_distribution_interest_asset': 0, 'portfolio_distribution_interest_fiat': 0, 'reward': 0}


Market Return : 23.01%   |   Portfolio Return : -100.00%   |   Position Changes : 24146   |   Episode Length : 25746   |   


In [8]:
# renderer = Renderer(render_logs_dir="render_logs/BTC")
# renderer.run()

In [9]:
## Multi Dataset Trading environment
# TODO: add data from more exchanges

TODO:
- implement agent. No idea what the openAI network looks like
- consider Random Forest model
- consider XGBoost model
- stacking multiple diverse models, and a meta model on top of that
- observe copilot's demoralizing tips when it comes to trading 🤔🤔🤔 
- consider RNN
- feed the results of a time series model (ARIMA, GARCH) as input features for a machine learning model
- Markov chain model
- stop making fun of me, copilot! 🤬🤬🤬
- consider LSTM model, https://sb3-contrib.readthedocs.io/en/master/modules/ppo_recurrent.html, https://iclr-blog-track.github.io/2022/03/25/ppo-implementation-details/

## Implement DQN agent

In [12]:
from stable_baselines3.common.monitor import Monitor

coin_data = preprocess(data)
env.reset()

model = DQN('MlpPolicy', env, learning_starts=100, verbose=1, tau=0.9, exploration_initial_eps=0.5, exploration_fraction=0.1, exploration_final_eps=0.05, device='cpu')

# Wrap the environment with Monitor
eval_env = Monitor(env)

eval_callback = EvalCallback(
    eval_env, best_model_save_path='./logs/',
    log_path='./logs/', eval_freq=100,
    n_eval_episodes=1,
    deterministic=True, render=False
)

model.learn(total_timesteps=1e4, callback=eval_callback)

env.render()

env.unwrapped.save_for_render()
renderer = Renderer(render_logs_dir="render_logs/BTC")
renderer.run()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Market Return : 23.01%   |   Portfolio Return : 11.45%   |   Position Changes : 1   |   Episode Length : 25746   |   
Eval num_timesteps=100, episode_reward=0.11 +/- 0.00
Episode length: 25745.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 2.57e+04 |
|    mean_reward      | 0.108    |
| rollout/            |          |
|    exploration_rate | 0.455    |
| time/               |          |
|    total_timesteps  | 100      |
----------------------------------
New best mean reward!
Market Return : 23.01%   |   Portfolio Return : 11.37%   |   Position Changes : 1   |   Episode Length : 25746   |   
Eval num_timesteps=200, episode_reward=0.11 +/- 0.00
Episode length: 25745.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 2.57e+04 |
|    mean_reward      | 0.108    |
| rollout/         

 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m


In [13]:
# load best model
model = DQN.load("logs/best_model.zip")

done, truncated = False, False
observation, info = env.reset()
print(info)
while not done and not truncated:
    action, _ = model.predict(observation)
    observation, reward, done, truncated, info = env.step(action)
    if done or truncated:
        print(info)

env.render()
env.unwrapped.save_for_render()

renderer = Renderer(render_logs_dir="render_logs/BTC")
renderer.run()

{'idx': 4, 'step': 0, 'date': numpy.datetime64('2021-01-03T15:00:00.000000000'), 'position_index': 7, 'position': -0.25, 'real_position': -0.25, 'data_high': 33873.45, 'data_open': 33811.54, 'data_volume': 8391.249757, 'data_close': 33506.62, 'data_low': 32727.0, 'portfolio_valuation': 1000.0, 'portfolio_distribution_asset': 0, 'portfolio_distribution_fiat': 1250.0, 'portfolio_distribution_borrowed_asset': 0.0074612121425557095, 'portfolio_distribution_borrowed_fiat': 0, 'portfolio_distribution_interest_asset': 0, 'portfolio_distribution_interest_fiat': 0, 'reward': 0}
Market Return : 23.01%   |   Portfolio Return : -100.00%   |   Position Changes : 17424   |   Episode Length : 25746   |   
{'idx': 25749, 'step': 25745, 'date': numpy.datetime64('2023-12-12T22:00:00.000000000'), 'position_index': array(1), 'position': -4, 'real_position': -4.055608217957823, 'data_high': 41220.38, 'data_open': 41104.02, 'data_volume': 665.07274, 'data_close': 41216.95, 'data_low': 41079.48, 'portfolio_v

 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [13/Dec/2023 00:14:43] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [13/Dec/2023 00:14:46] "GET /update_data/USD_2023-12-13_00-14-40.pkl HTTP/1.1" 200 -
127.0.0.1 - - [13/Dec/2023 00:14:47] "GET /metrics HTTP/1.1" 200 -
