In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import conf
import tensorflow as tf
import ta
import os
import gymnasium as gym

from util import preprocessing
# from port_env import StockPortfolioEnv
from new_env import StockPortfolioEnv
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO, DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise


In [2]:
models_dir = './models/A2C'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)


In [3]:
df = pd.read_csv('raw_data.csv', index_col=0)
fe = FeatureEngineer(tech_indicator_list=conf.TA_LIST)

df = fe.preprocess_data(df)
df = df.sort_values(['date', 'ticker']).reset_index(drop=True)
df.index = df.date.factorize()[0]

Successfully added technical indicators


In [4]:
df = preprocessing(df)
df.head()

Unnamed: 0,date,ticker,open,high,low,close,volume,day,rsi,macd,cci,atr,adx,sma,ema,obv,cov_list
0,2005-08-18,AAPL,1.675357,1.678571,1.633929,1.405441,442559600,3,63.799914,0.047677,85.670255,0.256897,55.175735,1.373838,1.383495,47468520400,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,AMZN,2.194,2.195,2.1755,2.1865,72086000,3,57.417249,0.083574,-154.579068,0.053844,30.145655,2.238444,2.219884,877646000,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,BRK-B,55.220001,56.740002,55.18,56.740002,1575000,3,62.346187,-0.007621,220.191877,0.5124,26.257157,55.661111,55.781312,-3875000,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,GOOG,6.871992,6.986314,6.849327,6.973611,476692087,3,38.838952,-0.066002,-159.592001,0.166249,28.235001,7.13163,7.130944,22021485703,"[[0.0006722603314515664, 0.0001167478716148638..."
0,2005-08-18,LIN,50.169998,50.540001,49.919998,35.377659,777700,3,55.669487,0.50142,-112.128744,15.303905,27.147617,35.679061,35.531792,19438900,"[[0.0006722603314515664, 0.0001167478716148638..."


In [5]:
df_train = df[df['date'] < '2022-01-01']
df_train.tail()

Unnamed: 0,date,ticker,open,high,low,close,volume,day,rsi,macd,cci,atr,adx,sma,ema,obv,cov_list
4121,2021-12-31,PLD,168.080002,169.929993,167.589996,161.802979,2025800,4,73.701749,4.136156,143.422663,8.306851,72.493913,158.240665,158.841934,662622500,"[[0.0002500422544913469, 0.0001426201684324260..."
4121,2021-12-31,UNH,504.140015,506.869995,502.040009,492.011261,1695400,4,71.079685,13.13033,92.259027,14.848088,49.867024,487.197005,487.622803,526127700,"[[0.0002500422544913469, 0.0001426201684324260..."
4121,2021-12-31,UPS,213.139999,215.419998,211.740005,203.705185,1579900,4,60.991196,2.445468,74.903041,12.968804,23.651654,200.931101,201.585801,77726400,"[[0.0002500422544913469, 0.0001426201684324260..."
4121,2021-12-31,WMT,143.199997,145.039993,142.919998,141.332428,5982600,4,58.17347,0.109066,81.717659,4.720047,25.538503,138.07752,138.958407,823897400,"[[0.0002500422544913469, 0.0001426201684324260..."
4121,2021-12-31,XOM,60.740002,61.470001,60.700001,57.903332,14072000,4,48.402421,-0.253682,10.593743,3.956968,13.88677,57.65835,57.85285,-116963500,"[[0.0002500422544913469, 0.0001426201684324260..."


In [6]:
df_test = df[df['date'] >= '2022-01-01']
df_test.head()

Unnamed: 0,date,ticker,open,high,low,close,volume,day,rsi,macd,cci,atr,adx,sma,ema,obv,cov_list
4122,2022-01-03,AAPL,177.830002,182.880005,177.710007,180.434296,104487900,0,66.613017,4.786703,108.693392,4.646072,52.559252,176.424871,176.474275,98167543400,"[[0.0002495422734495149, 0.0001423948493936616..."
4122,2022-01-03,AMZN,167.550003,170.703506,166.160507,170.404495,63520000,0,48.622296,-1.231128,-24.193272,4.074268,8.949928,169.757777,169.523506,22915150000,"[[0.0002495422734495149, 0.0001423948493936616..."
4122,2022-01-03,BRK-B,300.100006,301.299988,299.200012,300.790009,3570300,0,64.875653,3.962497,98.739398,3.82177,46.006927,297.370002,297.621625,812310700,"[[0.0002495422734495149, 0.0001423948493936616..."
4122,2022-01-03,GOOG,144.475494,145.550003,143.502502,145.074493,25214000,0,48.010599,-0.013619,-31.961908,2.632496,13.160649,146.120556,145.720813,27601502767,"[[0.0002495422734495149, 0.0001423948493936616..."
4122,2022-01-03,LIN,346.420013,346.589996,338.359985,331.05545,1514700,0,54.535226,4.353743,32.659128,10.067444,29.349115,333.408234,333.403141,153556000,"[[0.0002495422734495149, 0.0001423948493936616..."


In [7]:
stock_dim = len(conf.TICKERS)

In [8]:
env_kwargs = {
    'share_max': 5000,
    'initial_amount': 1000,
    'transaction_cost_pct': 0,
    'state_space': stock_dim,
    'action_space': stock_dim,
    'stock_dim': stock_dim,
    'tech_indicator_list': conf.TA_LIST,
    'reward_scaling': 0,
    'initial_weights': [1/stock_dim] * stock_dim
}

In [9]:
train_env = StockPortfolioEnv(df = df_train, **env_kwargs)
env_train, _ = train_env.get_sb_env()
type(env_train)
# agent = A2C(policy='MlpPolicy',
#             env=env,
#             learning_rate=0.0003,
#             gamma=1,
#             n_steps=1,
#             ent_coef=0.005,
#             verbose=1)




stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv

# PPO

In [12]:
# agent = PPO("MlpPolicy",
#             env=env,
#             learning_rate=0.0003,
#             n_steps=1024,
#             ent_coef=0.005,
#             batch_size=1024,
#             verbose=1)

agent = DRLAgent(env=env_train)

model_param = {"n_steps": 1024,
                "ent_coef": 0.005,
                "learning_rate": 0.0003,
                "batch_size": 1024,
                }
model = agent.get_model('ppo', model_kwargs=model_param)

{'buffer_size': 100000, 'batch_size': 1024}
Using cpu device


In [None]:
for i in range(1, 100000):
    train_ppo = agent.train_model(model=model,
                                  total_timesteps=conf.EPISODE_LENGTH,
                                  tb_log_name="PPO",
                                  reset_num_timesteps=False)
    if i % 10000 == 0:
        model.save(f'{models_dir}/timestep_{2048*i}')

In [33]:
model.save(f'{models_dir}/finished_training')

In [None]:
test_env = StockPortfolioEnv(raw_df = df_test, **env_kwargs)

In [None]:
loaded_model = model.load('./models/PPO/finished_training.zip')

In [None]:
daily_return, weights = agent.DRL_prediction(loaded_model, test_env)

begin_total_asset:1000
end_total_asset:1270.1574433117278
Sharpe:  2.795917258178576




# A2C

In [11]:
agent = DRLAgent(env=env_train)

model_param = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.001}
model = agent.get_model('a2c', model_kwargs=model_param)

{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.001}
Using cpu device


In [12]:
model = model.load('./models/A2C/timestep_80000.zip')
model.set_env(env_train)

In [13]:
for i in range(80001, 100000):
    train_a2c = agent.train_model(model=model,
                                  total_timesteps=conf.EPISODE_LENGTH,
                                  tb_log_name="A2C",
                                  reset_num_timesteps=False)
    if i % 10000 == 0:
        model.save(f'{models_dir}/timestep_{i}')

-------------------------------------
| time/                 |           |
|    fps                | 285       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 114602116 |
| train/                |           |
|    entropy_loss       | 42.3      |
|    explained_variance | 0         |
|    learning_rate      | 0.001     |
|    n_updates          | 22920421  |
|    policy_loss        | 4.42e+12  |
|    reward             | 6241.142  |
|    std                | 0.0662    |
|    value_loss         | 3.98e+08  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 324       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 114602876 |
| train/                |           |
|    entropy_loss       | 42.3      |
|    explained_variance | 0         |
|    learning_rate      | 0.001     |
|    n_updat

KeyboardInterrupt: 

In [16]:
model.save(f'{models_dir}/finished_training')