# Part 1. Install Packages

In [1]:
import os
import pandas as pd

# from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from customenv import StockTradingEnv
from custommodels import DRLAgent
from stable_baselines3.common.logger import configure
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

check_and_make_directories([TRAINED_MODEL_DIR])

## Read data

We first read the .csv file of our training data into dataframe.

In [2]:
train = pd.read_csv('train_data_single.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure 
# it has the columns and index in the form that could be make into the environment. 
# Then you can comment and skip the following two lines.
train = train.set_index(train.columns[0])
train.index.names = ['']

## Construct the environment

Calculate and specify the parameters we need for constructing the environment.

In [3]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(len(INDICATORS))
print(state_space)
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

8
11
Stock Dimension: 1, State Space: 11


In [4]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 10,
    "initial_amount": 100000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
    # "print_verbosity": 1
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

## Environment for training

In [5]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [6]:
agent = DRLAgent(env = env_train)

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_td3 = True
if_using_sac = True

## Agent Training: 5 algorithms (A2C, DDPG, PPO, TD3, SAC)


### Agent 1: A2C


In [7]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

if if_using_a2c:
  # set up logger
  tmp_path = RESULTS_DIR + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
Logging to results/a2c


In [8]:
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=25000) if if_using_a2c else None

-----------------------------------------
| time/                 |               |
|    fps                | 663           |
|    iterations         | 100           |
|    time_elapsed       | 0             |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -1.43         |
|    explained_variance | 0             |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | 0.267         |
|    reward             | 1.9528252e-05 |
|    std                | 1.01          |
|    value_loss         | 1.96e-08      |
-----------------------------------------
-----------------------------------------
| time/                 |               |
|    fps                | 616           |
|    iterations         | 200           |
|    time_elapsed       | 1             |
|    total_timesteps    | 1000          |
| train/                |               |
|    entropy_loss       | -1.44   

In [9]:
trained_a2c.save(TRAINED_MODEL_DIR + "/agent_a2c") if if_using_a2c else None

### Agent 2: DDPG

In [10]:
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")

if if_using_ddpg:
  # set up logger
  tmp_path = RESULTS_DIR + '/ddpg'
  new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ddpg.set_logger(new_logger_ddpg)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device
Logging to results/ddpg


In [11]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=20000) if if_using_ddpg else None

day: 3396, episode: 10
begin_total_asset: 100000.00
end_total_asset: 1238482.54
total_reward: 1138482.54
total_cost: 101.04
total_trades: 3384
Sharpe: 0.863
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 216         |
|    time_elapsed    | 62          |
|    total_timesteps | 13588       |
| train/             |             |
|    actor_loss      | -220        |
|    critic_loss     | 157         |
|    learning_rate   | 0.001       |
|    n_updates       | 10191       |
|    reward          | -0.21202786 |
------------------------------------


In [12]:
trained_ddpg.save(TRAINED_MODEL_DIR + "/agent_ddpg") if if_using_ddpg else None

### Agent 3: PPO

In [22]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

if if_using_ppo:
  # set up logger
  tmp_path = RESULTS_DIR + '/ppo'
  new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to results/ppo


In [23]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=100000) if if_using_ppo else None

-------------------------------------
| time/              |              |
|    fps             | 881          |
|    iterations      | 1            |
|    time_elapsed    | 2            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | 4.171881e-05 |
-------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 855          |
|    iterations           | 2            |
|    time_elapsed         | 4            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0063955104 |
|    clip_fraction        | 0.074        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_variance   | 5.96e-08     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0167      |
|    n_updates            | 10           |
|    policy_g

In [24]:
trained_ppo.save(TRAINED_MODEL_DIR + "/agent_ppo") if if_using_ppo else None

### Agent 4: TD3

In [16]:
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 100, 
              "buffer_size": 1000000, 
              "learning_rate": 0.001}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

if if_using_td3:
  # set up logger
  tmp_path = RESULTS_DIR + '/td3'
  new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_td3.set_logger(new_logger_td3)

{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cpu device
Logging to results/td3


In [17]:
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=20000) if if_using_td3 else None

---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 205      |
|    time_elapsed    | 66       |
|    total_timesteps | 13588    |
| train/             |          |
|    actor_loss      | 921      |
|    critic_loss     | 6.17e+03 |
|    learning_rate   | 0.001    |
|    n_updates       | 10191    |
|    reward          | 5e-17    |
---------------------------------


In [18]:
trained_td3.save(TRAINED_MODEL_DIR + "/agent_td3") if if_using_td3 else None

### Agent 5: SAC

In [19]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

if if_using_sac:
  # set up logger
  tmp_path = RESULTS_DIR + '/sac'
  new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_sac.set_logger(new_logger_sac)

{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to results/sac


In [20]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=20000) if if_using_sac else None

------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 139         |
|    time_elapsed    | 97          |
|    total_timesteps | 13588       |
| train/             |             |
|    actor_loss      | 72.1        |
|    critic_loss     | 3.98        |
|    ent_coef        | 0.339       |
|    ent_coef_loss   | 10.1        |
|    learning_rate   | 0.0001      |
|    n_updates       | 13487       |
|    reward          | -0.21202786 |
------------------------------------


In [21]:
trained_sac.save(TRAINED_MODEL_DIR + "/agent_sac") if if_using_sac else None

## Save the trained agent
Trained agents should have already been saved in the "trained_models" drectory after you run the code blocks above.

For Colab users, the zip files should be at "./trained_models" or "/content/trained_models".

For users running on your local environment, the zip files should be at "./trained_models".