In [39]:
import pandas as pd
from stable_baselines3.common.logger import configure

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
from finrl.main import check_and_make_directories
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv

check_and_make_directories([TRAINED_MODEL_DIR])

In [41]:
train = pd.read_csv('data/train_data.csv')

In [42]:
train.head()

Unnamed: 0,date,tic,close,high,low,open,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2018-01-02,AAPL,40.341892,40.351262,39.639317,39.850091,102223600.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,40.341892,40.341892,9.77,0.0
1,2018-01-02,AMZN,59.4505,59.5,58.525501,58.599998,53890000.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,59.4505,59.4505,9.77,0.0
2,2018-01-02,BRK-B,197.220001,198.869995,195.960007,198.869995,4113000.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,197.220001,197.220001,9.77,0.0
3,2018-01-02,GOOGL,53.258095,53.395556,52.256167,52.256167,31766000.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,53.258095,53.258095,9.77,0.0
4,2018-01-02,JPM,86.730354,86.786594,85.814444,86.473257,13578800.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,86.730354,86.730354,9.77,0.0


In [29]:
if "Unnamed: 0" in train.columns:
    train = train.drop(columns=["Unnamed: 0"])

train = train.sort_values(['date', 'tic']).reset_index(drop=True)

train.index = train['date'].factorize()[0]

In [30]:
train.shape

(12590, 18)

In [31]:
train.head()

Unnamed: 0,date,tic,close,high,low,open,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2018-01-02,AAPL,40.341892,40.351262,39.639317,39.850091,102223600.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,40.341892,40.341892,9.77,0.0
0,2018-01-02,AMZN,59.4505,59.5,58.525501,58.599998,53890000.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,59.4505,59.4505,9.77,0.0
0,2018-01-02,BRK-B,197.220001,198.869995,195.960007,198.869995,4113000.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,197.220001,197.220001,9.77,0.0
0,2018-01-02,GOOGL,53.258095,53.395556,52.256167,52.256167,31766000.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,53.258095,53.258095,9.77,0.0
0,2018-01-02,JPM,86.730354,86.786594,85.814444,86.473257,13578800.0,1.0,0.0,40.348316,40.328442,0.0,66.666667,100.0,86.730354,86.730354,9.77,0.0


In [32]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 10, State Space: 101


In [33]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

In [34]:
e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [35]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [36]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)


# set up logger
tmp_path = RESULTS_DIR + '/ppo'
new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
# Set new logger
model_ppo.set_logger(new_logger_ppo)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to results/ppo


In [38]:
model_ppo.learn(total_timesteps=50000, tb_log_name='ppo')
trained_ppo = model_ppo

---------------------------------
| time/              |          |
|    fps             | 389      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
| train/             |          |
|    learning_rate   | 0.00025  |
---------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 369         |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005943491 |
|    clip_fraction        | 0.0487      |
|    clip_range           | 0.2         |
|    entropy_loss         | -14.2       |
|    explained_variance   | -0.0096     |
|    learning_rate        | 0.00025     |
|    loss                 | 41.5        |
|    n_updates            | 28          |
|    policy_gradient_loss | -0.00627    |
|    std              

In [43]:
trained_ppo.save("models/ppo")

