# Imports

In [7]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gymnasium as gym
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.evaluation import evaluate_policy
import stable_baselines3
import gym_trading_env
from gym_trading_env.renderer import Renderer   
from pathlib import Path
from BitcoinDownloader import download_exchange_data, get_dataframes
from BitcoinIndicators import Indicators
from BitcoinRewards import reward_function

import torch
torch.device("cuda" if torch.cuda.is_available() else "cpu")
%matplotlib inline

# Download Data if needed

In [8]:
download_again = False
data_path = './data/binance-BTCUSDT-1h.pkl'
if not Path(data_path).is_file() or download_again:
    download_exchange_data()
else:
    print("data already downloaded")

data already downloaded


In [9]:
training_df, testing_df = get_dataframes()
print(training_df.head(2))
print(training_df.tail(2))
print(testing_df.head(2))
print(testing_df.tail(2))

                        open     high      low    close     volume  \
date_open                                                            
2017-08-17 04:00:00  4261.48  4313.62  4261.32  4308.83  47.181009   
2017-08-17 05:00:00  4308.83  4328.69  4291.37  4315.32  23.234916   

                             date_close  
date_open                                
2017-08-17 04:00:00 2017-08-17 05:00:00  
2017-08-17 05:00:00 2017-08-17 06:00:00  
                         open      high       low     close      volume  \
date_open                                                                 
2022-12-31 23:00:00  16520.28  16551.24  16487.74  16542.40  4973.43307   
2023-01-01 00:00:00  16541.77  16545.70  16508.39  16529.67  4364.83570   

                             date_close  
date_open                                
2022-12-31 23:00:00 2023-01-01 00:00:00  
2023-01-01 00:00:00 2023-01-01 01:00:00  
                         open     high       low     close      volume  \
date_ope

# Create features

In [10]:
ind_path = './data/indicators.csv'
training_df.to_csv(ind_path)
indicators = Indicators(ind_path)
indicators.to_csv(ind_path)
training_df = pd.read_csv(ind_path)
training_df["date_open"] = pd.to_datetime(training_df["date_open"])
training_df.set_index("date_open", inplace=True)

training_df.dropna(inplace=True)
print(training_df.head(3))
print(training_df.tail(3))

                     index     open     high      low    close     volume  \
date_open                                                                   
2017-08-22 12:00:00    128  3866.48  3990.12  3853.53  3934.01  75.787584   
2017-08-22 13:00:00    129  3949.01  3964.75  3874.35  3920.99  38.623871   
2017-08-22 14:00:00    130  3874.38  3920.99  3786.81  3849.00  26.182528   

                              date_close  feature_ROC_2  feature_RROC_2  \
date_open                                                                 
2017-08-22 12:00:00  2017-08-22 13:00:00       2.111697       35.867607   
2017-08-22 13:00:00  2017-08-22 14:00:00       1.537519     -287.510150   
2017-08-22 14:00:00  2017-08-22 15:00:00      -2.018670     -197.992428   

                     feature_ATR_2  ...  feature_ROC_32  feature_RROC_32  \
date_open                           ...                                    
2017-08-22 12:00:00        100.485  ...       -8.029003      -284.260096   
2017-08-22

# Create Environment

In [11]:
training_env = gym.make("TradingEnv",
        name= "BTCUSD",
        df = training_df, # Your dataset with your custom features
        positions = [0, 1], # -1 (=SHORT), 0(=SELL ALL), +1 (=BUY ALL)
        #trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        #borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
        #dynamic_feature_functions = [dynamic_features]
        # reward_function = reward_function,
        portfolio_initial_value = 1000,
        reward_function = reward_function
        #max_episode_duration = 1000,
    )

observation, info = training_env.reset()
print(observation)

[   2.111697    35.867607   100.485       99.61673     50.
    3.1281855  -45.050323   106.1775      95.85144     43.443478
    6.2386966 -205.62448     91.598335    65.98363     42.622845
    7.783201  -174.26988    134.965       33.85249     46.772453
   -3.7939115   58.428787   143.4325      58.263798    31.911493
   -8.029003  -284.2601     111.090935    70.8477      31.503231
   -4.6376657  -48.21791     82.007965    36.313114    32.214336
    0.           0.       ]


# Create Model

In [12]:
model = RecurrentPPO('MlpLstmPolicy', # feed-forward neural network with multiple hidden layers
            training_env, # environment in which the agent interacts and learns
            verbose=1, # enables the training progress to be printed during the learning process
            gamma=0.95, # determines the importance of future rewards compared to immediate rewards
            n_steps=15, # steps to collect samples from the environment before performing an update
            ent_coef=0.01, # encourages exploration by adding entropy to the policy loss
            learning_rate=0.001, # controls the step size at which model's parameters are updated based on the gradient of the loss function
            clip_range=0.1, # limits the update to a certain range to prevent large policy updates
            batch_size=15,
            device="cuda" if torch.cuda.is_available() else "cpu")
n = len(training_df)
model.learn(total_timesteps=n)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------
| time/              |    |
|    fps             | 60 |
|    iterations      | 1  |
|    time_elapsed    | 0  |
|    total_timesteps | 15 |
---------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 40           |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    total_timesteps      | 30           |
| train/                  |              |
|    approx_kl            | 0.0016625444 |
|    clip_fraction        | 0.0267       |
|    clip_range           | 0.1          |
|    entropy_loss         | -0.693       |
|    explained_variance   | 0.0544       |
|    learning_rate        | 0.001        |
|    loss                 | 483          |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0186      |
|    value_loss      

<sb3_contrib.ppo_recurrent.ppo_recurrent.RecurrentPPO at 0x20121ece6e0>

# Calculate indicators for testing dataset

In [13]:
ind_path = './data/indicators.csv'
testing_df.to_csv(ind_path)
indicators = Indicators(ind_path)
indicators.to_csv(ind_path)
testing_df = pd.read_csv(ind_path)
testing_df["date_open"] = pd.to_datetime(testing_df["date_open"])
testing_df.set_index("date_open", inplace=True)

testing_df.dropna(inplace=True)
print(testing_df.head(3))
print(testing_df.tail(3))

                     index      open      high       low     close  \
date_open                                                            
2023-01-06 08:00:00    128  16794.33  16812.22  16791.47  16802.11   
2023-01-06 09:00:00    129  16802.11  16802.27  16762.50  16787.10   
2023-01-06 10:00:00    130  16787.10  16802.98  16760.00  16767.06   

                         volume           date_close  feature_ROC_2  \
date_open                                                             
2023-01-06 08:00:00  5475.13940  2023-01-06 09:00:00       0.087678   
2023-01-06 09:00:00  7581.99164  2023-01-06 10:00:00      -0.027633   
2023-01-06 10:00:00  6268.17791  2023-01-06 11:00:00      -0.204559   

                     feature_RROC_2  feature_ATR_2  ...  feature_ROC_32  \
date_open                                           ...                   
2023-01-06 08:00:00     -138.976456         21.735  ...       -0.339088   
2023-01-06 09:00:00      -87.133089         30.260  ...       -0.359

In [14]:
testing_env = gym.make("TradingEnv",
        name= "BTCUSD",
        df = testing_df, # Your dataset with your custom features
        positions = [0, 1], # -1 (=SHORT), 0(=SELL ALL), +1 (=BUY ALL)
        portfolio_initial_value = 1000,
    )

observation, info = testing_env.reset()
print(observation.shape)
print(observation)
print(info)

(37,)
[ 8.7677605e-02 -1.3897646e+02  2.1735001e+01  9.3998184e+01
  5.0000000e+01 -1.6281354e-01  5.9759155e+01  2.7867500e+01
  8.6830162e+01  2.2887589e+01 -2.5204769e-01  1.0690672e+02
  2.6600000e+01  8.2152237e+01  1.8897150e+01 -3.4905249e-01
 -3.5503836e+02  3.0045000e+01  8.9621849e+01  1.8681402e+01
 -1.1987240e-01 -1.1604339e+02  3.3138126e+01  8.5513313e+01
  2.9803864e+01 -3.3908841e-01 -1.4387981e+02  3.4421562e+01
  7.5647644e+01  3.2066544e+01  6.7657948e-01 -4.3159134e+01
  4.5555626e+01  6.7108688e+01  3.6547573e+01  1.0000000e+00
  1.0000000e+00]
{'idx': 0, 'step': 0, 'date': numpy.datetime64('2023-01-06T08:00:00.000000000'), 'position_index': 1, 'position': 1, 'real_position': 1, 'data_close': 16802.11, 'data_price change': 8.049999999999272, 'data_high': 16812.22, 'data_date_close': '2023-01-06 09:00:00', 'data_volume': 5475.1394, 'data_low': 16791.47, 'data_open': 16794.33, 'data_index': 128, 'portfolio_valuation': 1000.0, 'portfolio_distribution_asset': 0.0595163

# Test trained model on testing data

In [15]:
for _ in range(len(testing_df)):
    position_index, _states = model.predict(observation)
    observation, reward, done, truncated, info = testing_env.step(position_index)
    testing_env.save_for_render(dir = "render_logs")
    if done or truncated:
        break

Market Return : 73.00%   |   Portfolio Return : 58.09%   |   


# Render results

In [16]:
renderer = Renderer(render_logs_dir="render_logs")
renderer.run()

 * Serving Flask app 'gym_trading_env.renderer'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [08/Aug/2023 11:43:34] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 11:43:37] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [08/Aug/2023 11:43:38] "GET /update_data/BTCUSD_2023-08-08_11-42-49.pkl HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 11:43:39] "GET /metrics HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 11:52:12] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 11:52:15] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [08/Aug/2023 11:52:16] "GET /update_data/BTCUSD_2023-08-08_11-42-49.pkl HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 11:52:16] "GET /metrics HTTP/1.1" 200 -
