In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import conf
import tensorflow as tf
import ta
import os

from util import preprocessing
from port_env import StockPortfolioEnv
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO

In [12]:
models_dir = './models/PPO'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)


In [13]:
df = pd.read_csv('raw_data.csv', index_col=0)
fe = FeatureEngineer(tech_indicator_list=conf.TA_LIST)

df = fe.preprocess_data(df)
df = df.sort_values(['date', 'ticker']).reset_index(drop=True)
df.index = df.date.factorize()[0]

Successfully added technical indicators


In [14]:
df = preprocessing(df)
df.head()

Unnamed: 0,date,ticker,open,high,low,close,volume,day,rsi,macd,cci,atr,adx,sma,ema,obv,cov_list
0,2005-08-18,AAPL,1.675357,1.678571,1.633929,1.405441,442559600,3,63.799914,0.047677,85.670255,0.256897,55.175735,1.373838,1.383495,47468520400,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,AMZN,2.194,2.195,2.1755,2.1865,72086000,3,57.417249,0.083574,-154.579068,0.053844,30.145655,2.238444,2.219884,877646000,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,BRK-B,55.220001,56.740002,55.18,56.740002,1575000,3,62.346187,-0.007621,220.191877,0.5124,26.257157,55.661111,55.781312,-3875000,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,GOOG,6.871992,6.986314,6.849327,6.973611,476692087,3,38.838952,-0.066002,-159.592001,0.166249,28.235001,7.13163,7.130944,22021485703,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,LIN,50.169998,50.540001,49.919998,35.377659,777700,3,55.669487,0.50142,-112.128744,15.303905,27.147617,35.679061,35.531792,19438900,"[[0.0006722603314515664, 0.0001167478716148638..."


In [15]:
stock_dim = len(conf.TICKERS)

In [16]:
env_kwargs = {
    'share_max': 5000,
    'initial_amount': 1000,
    'transaction_cost_pct': 0,
    'state_space': stock_dim,
    'action_space': stock_dim,
    'stock_dim': stock_dim,
    'tech_indicator_list': conf.TA_LIST,
    'reward_scaling': 0,
    'initial_weights': [1/stock_dim] * stock_dim
}

In [17]:
train_env = StockPortfolioEnv(raw_df = df, **env_kwargs)
env, _ = train_env.get_sb_env()
type(env)
# agent = A2C(policy='MlpPolicy',
#             env=env,
#             learning_rate=0.0003,
#             gamma=1,
#             n_steps=1,
#             ent_coef=0.005,
#             verbose=1)




stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv

In [18]:
# agent = PPO("MlpPolicy",
#             env=env,
#             learning_rate=0.0003,
#             n_steps=1024,
#             ent_coef=0.005,
#             batch_size=1024,
#             verbose=1)

agent = DRLAgent(env=env)

model_param = {"n_steps": 1024,
                "ent_coef": 0.005,
                "learning_rate": 0.0003,
                "batch_size": 1024,
                }
model = agent.get_model('ppo', model_kwargs=model_param)

{'n_steps': 1024, 'ent_coef': 0.005, 'learning_rate': 0.0003, 'batch_size': 1024}
Using cpu device


In [19]:
for i in range(1, 100000):
    train_ppo = agent.train_model(model=model,
                                  total_timesteps=conf.EPISODE_LENGTH,
                                  tb_log_name="PPO",
                                  reset_num_timesteps=False)
    if i % 10000 == 0:
        model.save(f'{models_dir}/timestep_{2048*i}')

begin_total_asset:1000
end_total_asset:2169.120570569911
Sharpe:  0.7871185657508476
------------------------------------------
| time/                   |              |
|    fps                  | 413          |
|    iterations           | 2            |
|    time_elapsed         | 4            |
|    total_timesteps      | 197212160    |
| train/                  |              |
|    approx_kl            | 6.170012e-09 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -24.4        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 2.12e+08     |
|    n_updates            | 1925890      |
|    policy_gradient_loss | -1.52e-05    |
|    reward               | 1689.0261    |
|    std                  | 2.22         |
|    value_loss           | 4.24e+08     |
------------------------------------------
begin_total_asset:1000
end_total_asset:3579.61241883999

In [20]:
model.save(f'{models_dir}/finished_training')