In [1]:
# INSTALL FinRL Pack and IMPORT libs
## Install Packages
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git
!pip install git+https://github.com/AI4Finance-Foundation/FinRL-Meta.git
## Import FinRL libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime

%matplotlib inline
from finrl.apps import config

from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.finrl_meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.drl_agents.stablebaselines3.models import DRLAgent # Deep Reinforcement Learning Agent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

from pprint import pprint

import sys
sys.path.append("../FinRL-Library")

import itertools
## Import Stock time series and Technical Indicators
from google.colab import drive
drive.mount('/content/gdrive')
# DOW30 STOCKS BETWEEN 2000-2016 TRAINING / 2016-2020 TRADING
#del df
# 


filenames = ['AAPL']
df_temps = []
for fn in filenames :
    df_temp = pd.read_csv('gdrive/My Drive/Colab Notebooks/Stocks_reinforcement_learning/'+fn+'.csv', sep=';')
    df_temp = df_temp.iloc[33:, 0:9]
    df_temp['tic'] = fn
    df_temps.append(df_temp)
df = pd.concat(df_temps, ignore_index=True)

df_baseline = get_baseline(ticker='AAPL', start='2016-09-15', end='2020-09-04') # ^DJI
stats = backtest_stats(df_baseline, value_col_name = 'close')
## PROCESS DATA
print(df.head())
print(df.shape)

# CHANGE TO DATETIME OBJECT
from datetime import datetime
df['Date'] = df['Date'].apply(lambda x: datetime.strptime(x, "%d.%m.%Y") )
df.head()

# SORT BY DATE
df = df.sort_values(by=['Date','tic'], ignore_index=True)
df.head()
df.rename(columns={'Close': 'close'}, inplace=True)
df.rename(columns={'Date': 'date'}, inplace=True)
df['close'] = df['close'].apply(lambda x: float(x.replace(",", ".")))
df['RSI'] = df['RSI'].apply(lambda x: float(x.replace(",", ".")))
df['TNRSI'] = df['TNRSI'].apply(lambda x: float(x.replace(",", ".")))
df['BB PERCENT'] = df['BB PERCENT'].apply(lambda x: float(x.replace(",", ".")))
df['ROC-14'] = df['ROC-14'].apply(lambda x: float(x.replace(",", ".")))
df['SO'] = df['SO'].apply(lambda x: float(x.replace(",", ".")))
df['PPO'] = df['PPO'].apply(lambda x: float(x.replace(",", ".")))
df['CCI'] = df['CCI'].apply(lambda x: float(x.replace(",", ".")))
df.head()
# SPLIT DATA AS TRAIN AND TEST
#TRAIN
training = data_split(df, '2000-10-20','2016-09-15')
print(training.shape)
#TEST
trading = data_split(df, '2016-09-15','2020-09-04')
print(trading.shape)
## Action Space and State Space
# Normally, Three are 3 actions: {-1, 0, 1}, where 
# *   Selling one share = -1
# *   Holding one share = 0
# *   Buying one share = 1

# Also, It can be represented for multiple shares. 
# An action space: {-k,…,-1, 0, 1, …, k}
# *   the number of shares to buy = k
# *   the number of shares to sell= -k 

# For example, 
# * Buy 10 shares of AAPL = 10
# * Sell 10 shares of AAPL = -10 
ratio_list = ['RSI','TNRSI','BB PERCENT','ROC-14','SO','CCI','PPO'] # Technical Indicator List
stock_dimension = len(training.tic.unique()) # IT CAN BE ONE STOCK OR MULTIPLE STOCKS
state_space = 1 + 2*stock_dimension + len(ratio_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

# Parameters for the environment
env_kwargs = {
    "hmax": 100, # -100<=K<=100 IT MEANS 100*2+1=201 ACTION SPACE  [1 0 -1]= 3 Act space
    "initial_amount": 100000, 
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": ratio_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
   
}
#Establish the training environment using StockTradingEnv() class
e_train_gym = StockTradingEnv(df = training, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

# MODEL1: ⚽ Advantage Actor Critic (A2C) 

agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")
trained_a2c = agent.train_model(model=model_a2c, 
                             tb_log_name='a2c',
                             total_timesteps=50000)
## TEST Advantage Actor Critic (A2C)
e_trade_gym = StockTradingEnv(df = trading, **env_kwargs)
df_account_value_a2c, df_actions = DRLAgent.DRL_prediction(
    model=trained_a2c, 
    environment = e_trade_gym)
print(df_account_value_a2c.shape)
df_account_value_a2c.tail()
df_actions
# MODEL 2: ⛽ Deep Deterministic Policy Gradient (DDPG)
agent = DRLAgent(env = env_train)
model_ddpg = agent.get_model("ddpg")
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000)
## TEST Deep Deterministic Policy Gradient (DDPG)
e_trade_gym = StockTradingEnv(df = trading, **env_kwargs)
df_account_value_ddpg, df_actions = DRLAgent.DRL_prediction(
    model=trained_ddpg, 
    environment = e_trade_gym)
print(df_account_value_ddpg.shape)
df_account_value_ddpg.tail()
df_actions
# MODEL3: ❎ Proximal Policy Optimization (PPO) 
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=50000)
## TEST Proximal Policy Optimization (PPO)
e_trade_gym = StockTradingEnv(df = trading, **env_kwargs)
df_account_value_ppo, df_actions = DRLAgent.DRL_prediction(
    model=trained_ppo, 
    environment = e_trade_gym)
print(df_account_value_ppo.shape)
df_account_value_ppo.tail()
df_actions
# MODEL4: ⛄ Twin-Delayed Deep Deterministic Policy Gradient Agents (TD3)
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 100, 
              "buffer_size": 1000000, 
              "learning_rate": 0.001}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=50000)
## TEST Twin-Delayed Deep Deterministic Policy Gradient Agents (TD3)
e_trade_gym = StockTradingEnv(df = trading, **env_kwargs)
df_account_value_td3, df_actions = DRLAgent.DRL_prediction(
    model=trained_td3, 
    environment = e_trade_gym)
print(df_account_value_td3.shape)
df_account_value_td3.tail()
# MODEL 5: ⏲ Soft Actor-Critic (SAC)
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 1000000,
    "learning_rate": 0.001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=50000)
## TEST Soft Actor-Critic (SAC)
e_trade_gym = StockTradingEnv(df = trading, **env_kwargs)
df_account_value_sac, df_actions = DRLAgent.DRL_prediction(
    model=trained_sac, 
    environment = e_trade_gym)
print(df_account_value_sac.shape)
df_account_value_sac.tail()
# Back Test Plot
print("==============Get Backtest Results===========")
perf_stats_all = backtest_stats(account_value=df_account_value_a2c)
perf_stats_all = pd.DataFrame(perf_stats_all)
print("==============Get Baseline Results===========")
df_baseline = get_baseline(ticker='AAPL', start='2016-09-15', end='2020-09-04') # ^DJI
stats = backtest_stats(df_baseline, value_col_name = 'close')
stats = pd.DataFrame(stats)
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
backtest_plot(df_account_value_a2c, 
             baseline_ticker = 'AAPL', 
             baseline_start = '2016-09-15',
             baseline_end = '2020-09-04') 
import pyfolio
def backtest_plot():
    baseline_returns = get_daily_return(df_baseline, value_col_name="close") # This is current daily return 
    test_returns = get_daily_return(df_account_value_a2c) # This is our offering strategy
    with pyfolio.plotting.plotting_context(font_scale=1.1):
        pyfolio.create_full_tear_sheet(
            returns=test_returns, benchmark_rets=baseline_returns, set_context=False
        )
print("==============Compare to baseline buy-and-hold===========")
%matplotlib inline
backtest_plot()

Collecting git+https://github.com/AI4Finance-LLC/FinRL-Library.git
  Cloning https://github.com/AI4Finance-LLC/FinRL-Library.git to /tmp/pip-req-build-p0z_qvl2
  Running command git clone -q https://github.com/AI4Finance-LLC/FinRL-Library.git /tmp/pip-req-build-p0z_qvl2
Collecting pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2
  Cloning https://github.com/quantopian/pyfolio.git to /tmp/pip-install-dr4q783u/pyfolio_b1a64f3e2d034573a331836433e1914a
  Running command git clone -q https://github.com/quantopian/pyfolio.git /tmp/pip-install-dr4q783u/pyfolio_b1a64f3e2d034573a331836433e1914a
Collecting elegantrl@ git+https://github.com/AI4Finance-Foundation/ElegantRL.git#egg=elegantrl
  Cloning https://github.com/AI4Finance-Foundation/ElegantRL.git to /tmp/pip-install-dr4q783u/elegantrl_f8a0983e0d824781bfd8e5305332efac
  Running command git clone -q https://github.com/AI4Finance-Foundation/ElegantRL.git /tmp/pip-install-dr4q783u/elegantrl_f8a0983e0d824781bfd8e5305332e

  'Module "zipline.assets" not found; multipliers will not be applied'


Mounted at /content/gdrive
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (1000, 8)
Annual return          0.455316
Cumulative returns     3.432574
Annual volatility      0.302022
Sharpe ratio           1.395618
Calmar ratio           1.182150
Stability              0.844736
Max drawdown          -0.385159
Omega ratio            1.311865
Sortino ratio          2.070484
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.102629
Daily value at risk   -0.036379
dtype: float64
           Date,Open,High,Low,Close,Adj Close,Volume   tic
0  1981-01-30,0.127232,0.127232,0.126116,0.126116...  AAPL
1  1981-02-02,0.119420,0.119420,0.118862,0.118862...  AAPL
2  1981-02-03,0.123326,0.123884,0.123326,0.123326...  AAPL
3  1981-02-04,0.127790,0.128348,0.127790,0.127790...  AAPL
4  1981-02-05,0.127790,0.128906,0.127790,0.127790...  AAPL
(10348, 2)


KeyError: ignored

In [5]:
df.head()

Unnamed: 0,"Date,Open,High,Low,Close,Adj Close,Volume",tic
0,"1981-01-30,0.127232,0.127232,0.126116,0.126116...",AAPL
1,"1981-02-02,0.119420,0.119420,0.118862,0.118862...",AAPL
2,"1981-02-03,0.123326,0.123884,0.123326,0.123326...",AAPL
3,"1981-02-04,0.127790,0.128348,0.127790,0.127790...",AAPL
4,"1981-02-05,0.127790,0.128906,0.127790,0.127790...",AAPL


In [3]:
df.columns

Index(['Date,Open,High,Low,Close,Adj Close,Volume', 'tic'], dtype='object')

In [4]:
df.Date

AttributeError: ignored

# New Section

# New Section