<a href="https://colab.research.google.com/github/robbespo00/finance_reinforcement_learning/blob/main/multiple_stock_trading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Multiple Stock Trading**

In [None]:
! pip install yfinance &> /dev/null
! pip install stockstats &> /dev/null
! pip install stable-baselines[mpi] &> /dev/null
! pip install tensorflow==1.15.4 &> /dev/null
! pip install git+https://github.com/AI4Finance-Foundation/FinRL.git &> /dev/null

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime

%matplotlib inline
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.meta.data_processor import DataProcessor

from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from pprint import pprint

import sys
sys.path.append("../FinRL")

import itertools

from finrl import config
from finrl import config_tickers
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)

TRAIN_START_DATE = '2009-01-01'
TRAIN_END_DATE = '2020-07-01'
TRADE_START_DATE = '2020-07-01'
TRADE_END_DATE = '2021-10-31'

In [None]:
df = YahooDownloader(start_date=TRAIN_START_DATE,
                     end_date=TRADE_END_DATE,
                     ticker_list=config_tickers.DOW_30_TICKER).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [None]:
print(config_tickers.DOW_30_TICKER)

['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW']


In [None]:
df.sort_values(['date','tic'],ignore_index=True).head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2009-01-02,3.067143,3.251429,3.041429,2.771174,746015200,AAPL,4
1,2009-01-02,58.59,59.080002,57.75,44.867599,6547900,AMGN,4
2,2009-01-02,18.57,19.52,18.4,15.477421,10955700,AXP,4
3,2009-01-02,42.799999,45.560001,42.779999,33.941101,7010200,BA,4
4,2009-01-02,44.91,46.98,44.709999,31.942253,7117200,CAT,4


In [None]:
fe = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list=INDICATORS,
    use_vix=True,
    use_turbulence=True,
    user_defined_feature=False
)

processed = fe.preprocess_data(df)

Successfully added technical indicators
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3229, 8)
Successfully added vix
Successfully added turbulence index


In [None]:
list_ticker = processed['tic'].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(), processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination, columns=['date','tic']).merge(processed, on=['date','tic'],how='left')
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

In [None]:
processed_full.sort_values(['date','tic'], ignore_index=True).head(10)

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2009-01-02,AAPL,3.067143,3.251429,3.041429,2.771174,746015200.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,2.771174,2.771174,39.189999,0.0
1,2009-01-02,AMGN,58.59,59.080002,57.75,44.867599,6547900.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,44.867599,44.867599,39.189999,0.0
2,2009-01-02,AXP,18.57,19.52,18.4,15.477421,10955700.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,15.477421,15.477421,39.189999,0.0
3,2009-01-02,BA,42.799999,45.560001,42.779999,33.941101,7010200.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,33.941101,33.941101,39.189999,0.0
4,2009-01-02,CAT,44.91,46.98,44.709999,31.942253,7117200.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,31.942253,31.942253,39.189999,0.0
5,2009-01-02,CRM,8.025,8.55,7.9125,8.505,4069200.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,8.505,8.505,39.189999,0.0
6,2009-01-02,CSCO,16.41,17.0,16.25,12.155674,40980600.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,12.155674,12.155674,39.189999,0.0
7,2009-01-02,CVX,74.230003,77.300003,73.580002,44.807617,13695900.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,44.807617,44.807617,39.189999,0.0
8,2009-01-02,DIS,22.76,24.030001,22.5,20.597498,9796600.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,20.597498,20.597498,39.189999,0.0
9,2009-01-02,GS,84.019997,87.620003,82.190002,70.735062,14088500.0,4.0,0.0,2.995051,2.664252,100.0,66.666667,100.0,70.735062,70.735062,39.189999,0.0


In [None]:
train = data_split(processed_full, TRAIN_START_DATE, TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE, TRADE_END_DATE)

print(len(train))
print(len(trade))

83897
9744


In [None]:
train.tail()

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
2892,2020-06-30,UNH,288.570007,296.450012,287.660004,286.754181,2932900.0,1.0,-0.019415,302.84589,270.287334,52.413049,-25.866269,1.846804,286.9972,280.002895,30.43,12.918722
2892,2020-06-30,V,191.490005,193.75,190.160004,190.737228,9040100.0,1.0,1.048785,198.750523,185.041397,53.02103,-51.550912,2.013358,191.485036,181.677682,30.43,12.918722
2892,2020-06-30,VZ,54.919998,55.290001,54.360001,49.75082,17414800.0,1.0,-0.43168,53.248501,48.123872,48.09703,-51.0656,8.508886,50.378309,50.825242,30.43,12.918722
2892,2020-06-30,WBA,42.119999,42.580002,41.759998,39.035736,4782100.0,1.0,-0.083986,42.609308,36.487093,48.83019,-14.508045,1.500723,39.135189,38.935129,30.43,12.918722
2892,2020-06-30,WMT,119.220001,120.129997,118.540001,116.121765,6836400.0,1.0,-0.886569,119.473766,113.510451,48.159664,-69.938813,3.847271,117.787627,119.723274,30.43,12.918722


In [None]:
trade.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2020-07-01,AAPL,91.279999,91.839996,90.977501,89.904602,110737200.0,2.0,3.014603,92.699319,80.179951,62.807128,107.491876,29.730532,83.933767,77.717543,28.620001,53.068043
0,2020-07-01,AMGN,235.520004,256.230011,232.580002,240.153946,6575800.0,2.0,3.636391,232.397415,200.212954,61.27962,271.386137,46.806139,214.858661,215.931665,28.620001,53.068043
0,2020-07-01,AXP,95.25,96.959999,93.639999,91.743057,3301000.0,2.0,-0.38771,110.012237,87.432137,48.504825,-66.320992,3.142448,96.882072,90.357376,28.620001,53.068043
0,2020-07-01,BA,185.880005,190.610001,180.039993,180.320007,49036700.0,2.0,5.443193,220.721139,160.932863,50.925771,24.220608,15.93292,176.472335,155.614168,28.620001,53.068043
0,2020-07-01,CAT,129.380005,129.399994,125.879997,119.817108,2807800.0,2.0,1.263827,129.720776,112.569085,52.865417,35.633512,14.457404,118.58688,112.860601,28.620001,53.068043


In [None]:
INDICATORS

['macd',
 'boll_ub',
 'boll_lb',
 'rsi_30',
 'cci_30',
 'dx_30',
 'close_30_sma',
 'close_60_sma']

In [None]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [None]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [None]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [None]:
agent = DRLAgent(env = env_train)

# **1) Actor Critic Method**

$$\nabla_{\theta} J(\theta) = \mathbb{E}_{\tau} \left[ \sum_{t=0}^{T-1} \nabla_{\theta} \log \pi_{\theta}(a_t|s_t)Q_w (s_t,a_t) \right]$$

In [None]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device


In [None]:
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=50000)

---------------------------------------
| time/                 |             |
|    fps                | 62          |
|    iterations         | 100         |
|    time_elapsed       | 7           |
|    total_timesteps    | 500         |
| train/                |             |
|    entropy_loss       | -41.2       |
|    explained_variance | 5.96e-08    |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | 25.7        |
|    reward             | 0.038072154 |
|    std                | 1           |
|    value_loss         | 0.767       |
---------------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 76         |
|    iterations         | 200        |
|    time_elapsed       | 13         |
|    total_timesteps    | 1000       |
| train/                |            |
|    entropy_loss       | -41.2      |
|    explained_variance | -0.00794   |
|    lear

# **2) Deep Deterministic Policy Gradient**

In [None]:
// TODO

# **3) Proximal Policy Optimization**

$$L^{\text{CLIP}}(\theta)=\hat{\mathbb{E}} \left[ \min(r_t(\theta)\hat{A}_t, \text{clip}(r_t(\theta),1-\epsilon,1+\epsilon)\hat{A}_t) \right]$$

In [None]:
// TODO

# **4) Twin Delayes Deep Deterministic Policy Gradient**

In [None]:
// TODO

# **5) Soft Actor Critic**

In [None]:
// TODO