# FinRL

## Part 2

In [14]:
import pandas as pd

from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

check_and_make_directories([TRAINED_MODEL_DIR])

### Load the Data from Part 1

In [23]:
train = pd.read_csv('./data/train_data.csv')

train = train.set_index(train.columns[0])
train.index.names = ['']

### Setup Environment

In [24]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f'Stock Dimension: {stock_dimension}, State Space: {state_space}')

Stock Dimension: 4, State Space: 41


In [25]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    'hmax': 100,
    'initial_amount': 1000000,
    'num_stock_shares': num_stock_shares,
    'buy_cost_pct': buy_cost_list,
    'sell_cost_pct': sell_cost_list,
    'state_space': state_space,
    'stock_dim': stock_dimension,
    'tech_indicator_list': INDICATORS,
    'action_space': stock_dimension,
    'reward_scaling': 1e-4
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

### Environment for Training

In [26]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


### Train Agent

In [27]:
agent = DRLAgent(env = env_train)

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = False
if_using_ppo = False
if_using_td3 = False
if_using_sac = False

### A2C Model

The code above and below provide a healthy framework to add different agent types within the same notebook. For this I'll only be training sn A2C model, but may add ddpg, ppo, and td3 later

In [28]:
model_a2c = agent.get_model('a2c')
model_ppo = agent.get_model('ppo')

if if_using_a2c:
    # set up logger
    tmp_path = RESULTS_DIR + '/a2c'
    new_logger_a2c = configure(tmp_path, ['stdout', 'csv', 'tensorboard'])
    
    # set new logger
    model_a2c.set_logger(new_logger_a2c)

# Come back to this later
if if_using_ppo:
    # set up logger
    tmp_path = RESULTS_DIR + '/ppo'
    new_logger_ppo = configure(tmp_path, ['stdout', 'csv', 'tensorboard'])
    
    # set new logger
    model_ppo.set_logger(new_logger_ppo)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 64}
Using cpu device
Logging to results/a2c


In [29]:
trained_a2c = agent.train_model(model=model_a2c,
                                tb_log_name='a2c',
                                total_timesteps=50000) if if_using_a2c else None

trained_ppo = agent.train_model(model=model_ppo,
                                tb_log_name='ppo',
                                total_timesteps=50000) if if_using_ppo else None

---------------------------------------
| time/                 |             |
|    fps                | 929         |
|    iterations         | 100         |
|    time_elapsed       | 0           |
|    total_timesteps    | 500         |
| train/                |             |
|    entropy_loss       | -5.75       |
|    explained_variance | -1.53       |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | -3.65       |
|    reward             | -0.47909597 |
|    std                | 1.02        |
|    value_loss         | 0.502       |
---------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 960       |
|    iterations         | 200       |
|    time_elapsed       | 1         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -5.74     |
|    explained_variance | 0.108     |
|    learning_rate

In [30]:
trained_a2c.save(TRAINED_MODEL_DIR + '/agent_a2c') if if_using_a2c else None

trained_ppo.save(TRAINED_MODEL_DIR + '/agent_ppo') if if_using_ppo else None