## Import Libraries

In [1]:
! /opt/conda/envs/py39/bin/python -m pip install pandas_ta

Collecting pandas_ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m768.4 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: pandas_ta
  Building wheel for pandas_ta (setup.py) ... [?25ldone
[?25h  Created wheel for pandas_ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218908 sha256=7d054c73f506e3bccba3295112a90dc9e1d6c84588a5d4e26bd6af9cd7e21576
  Stored in directory: /root/.cache/pip/wheels/1e/5c/47/759b32beb377aee150eb4c1f1b2b7635032cc19ea09ce076fd
Successfully built pandas_ta
Installing collected packages: pandas_ta
Successfully installed pandas_ta-0.3.14b0
[0m

In [2]:
import gym
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import pandas_ta as ta
from stockstats import StockDataFrame as Sdf
from finrl.agents.stablebaselines3.models import A2C

from finrl.agents.stablebaselines3.models import DummyVecEnv
from sklearn.preprocessing import StandardScaler

## Importing local modules

In [3]:
import os
import sys
sys.path.append('../')
from src.rl_env.stock import SingleStockEnv
from src.models.models import RLModels
from src.data.make_dataset import download_stock_data

In [4]:
#Diable the warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:
data_df = pd.read_csv('../data/snp.csv')[['Date','Close']].rename({'Date':'date','Close':'adjcp'}, axis = 1)

In [6]:
data_df

Unnamed: 0,date,adjcp
0,2017-08-17,2430.01
1,2017-08-18,2425.55
2,2017-08-19,2425.55
3,2017-08-20,2425.55
4,2017-08-21,2428.37
...,...,...
2049,2023-03-25,3970.99
2050,2023-03-26,3970.99
2051,2023-03-27,3977.53
2052,2023-03-28,3971.27


In [7]:
data_df['rsi'] = ta.rsi(data_df['adjcp'])

In [8]:
data_df['macd'] = ta.macd(data_df['adjcp'])['MACD_12_26_9']

In [9]:
data_df.fillna(0, inplace=True)

In [10]:
data_clean = data_df.copy()

In [11]:
train = data_clean[(data_clean.date>='2017-08-17') & (data_clean.date<'2022-03-29')]
# the index needs to start from 0
train=train.reset_index(drop=True)


In [12]:
model_list = ['ddpg','ppo','a2c']

In [None]:
model_dict = {}
for m in model_list:
    print(30*"=", m, 30*"=")
    env_train = DummyVecEnv([lambda: SingleStockEnv(train,feat_list=['macd','rsi'])])
    model = RLModels(m, env_train)
    model.train(total_timesteps=200000)
    model.save('AAPL_ppo_100k')
    model_dict.update({m:model})

previous_total_asset:100000
end_total_asset:169362.48465048565
total_reward:69362.48465048565
total_cost:  2614.959069058009
total trades:  1672
Sharpe:  0.5501851069649215
previous_total_asset:100000
end_total_asset:187866.27959000002
total_reward:87866.27959000002
total_cost:  99.63041000000001
total trades:  1686
Sharpe:  0.6421388187269103
previous_total_asset:100000
end_total_asset:187866.27959000002
total_reward:87866.27959000002
total_cost:  99.63041000000001
total trades:  1686
Sharpe:  0.6421388187269103
previous_total_asset:100000
end_total_asset:187866.27959000002
total_reward:87866.27959000002
total_cost:  99.63041000000001
total trades:  1686
Sharpe:  0.6421388187269103
previous_total_asset:100000
end_total_asset:187866.27959000002
total_reward:87866.27959000002
total_cost:  99.63041000000001
total trades:  1686
Sharpe:  0.6421388187269103
previous_total_asset:100000
end_total_asset:187866.27959000002
total_reward:87866.27959000002
total_cost:  99.63041000000001
total trad

In [None]:
test = data_clean[(data_clean.date>='2022-03-29') ]
# the index needs to start from 0
test=test.reset_index(drop=True)

### Testing

In [None]:
def get_DRL_sharpe():
    df_total_value=pd.read_csv('account_value.csv',index_col=0)
    df_total_value.columns = ['account_value']
    df_total_value['daily_return']=df_total_value.pct_change(1)
    sharpe = (252**0.5)*df_total_value['daily_return'].mean()/ \ # 252 trading days
    df_total_value['daily_return'].std()
    
    annual_return = ((df_total_value['daily_return'].mean()+1)**252-1)*100
    print("annual return: ", annual_return)
    print("sharpe ratio: ", sharpe)
    return df_total_value

In [None]:
cum_return = {}
for m in model_list:
    model = model_dict[m]
    env_test = DummyVecEnv([lambda: SingleStockEnv(test,feat_list=['macd','rsi'])])
    obs_test = env_test.reset()
    print("==============Model Prediction===========")
    for i in range(len(test.index.unique())):
        
        action, _states = model.predict(obs_test)
        obs_test, rewards, dones, info = env_test.step(action)
        env_test.render()
    df_total_value=pd.read_csv('account_value.csv',index_col=0)
    df_total_value.columns = ['account_value']
    df_total_value['daily_return']=df_total_value.pct_change(1)
    
    cum_return.update({m: (df_total_value.account_value.pct_change(1)+1).cumprod()-1})

In [None]:

%matplotlib inline
fig, ax = plt.subplots(figsize=(12, 8))

plt.plot(test.date, cum_return['ppo'], color='red',label = "PPO")
plt.plot(test.date, cum_return['a2c'], label = "A2C")
plt.plot(test.date, cum_return['ddpg'], color = 'green', label = "DDPG")

plt.title("Cumulative Return for PPO and A2C with Transaction Cost",size= 18)
plt.legend()
plt.rc('legend',fontsize=15)
plt.rc('xtick', labelsize=15)
plt.rc('ytick', labelsize=15)



## With correlation

In [None]:
bit_cor_df = pd.read_csv('../data/SnP_bit_corr.csv', usecols=['Date','correlation']).rename({'Date':'date', 'correlation':'cor_bit'}, axis = 1)

In [None]:
eth_cor_df = pd.read_csv('../data/SnP_eth_corr.csv', usecols=['Date','correlation']).rename({'Date':'date', 'correlation':'cor_eth'}, axis = 1)

In [None]:
bit_cor_df

In [None]:
df_corr = pd.concat([data_clean, bit_cor_df['cor_bit'],eth_cor_df['cor_eth']], axis = 1).dropna()

In [None]:
train = df_corr[(data_clean.date>='2017-08-17') & (df_corr.date<'2022-03-29')]
# the index needs to start from 0
train=train.reset_index(drop=True)


In [None]:
model_dict = {}
for m in model_list:
    print(30*"=", m, 30*"=")
    env_train = DummyVecEnv([lambda: SingleStockEnv(train,feat_list=['macd','rsi','cor_bit','cor_eth'])])
    model = RLModels(m, env_train)
    model.train(total_timesteps=200000)
    model.save('AAPL_ppo_100k')
    model_dict.update({m:model})

In [None]:
test = df_corr[(df_corr.date>='2022-03-29') ]
# the index needs to start from 0
test=test.reset_index(drop=True)

In [None]:
cum_return = {}
for m in model_list:
    model = model_dict[m]
    env_test = DummyVecEnv([lambda: SingleStockEnv(test,feat_list=['macd','rsi'])])
    obs_test = env_test.reset()
    print("==============Model Prediction===========")
    for i in range(len(test.index.unique())):
        
        action, _states = model.predict(obs_test)
        obs_test, rewards, dones, info = env_test.step(action)
        env_test.render()
    df_total_value=pd.read_csv('account_value.csv',index_col=0)
    df_total_value.columns = ['account_value']
    df_total_value['daily_return']=df_total_value.pct_change(1)
    
    cum_return.update({m: (df_total_value.account_value.pct_change(1)+1).cumprod()-1})