# RL in Finance(Test Cash Penalty) 
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sunnyswag/RL_in_Finance/blob/main/RL_in_Finance_Test_cash_penalty.ipynb)

## 1、拉取 github 仓库，下载并导入相关包
&emsp;&emsp;运行流程：python setup.py -> pip install -r requirements.txt

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime
import time

%matplotlib inline
from utils import config
from utils.pull_data import Pull_data
from utils.preprocessors import FeatureEngineer, split_data
from utils.env_retreat_penalty import StockTradingEnvRetreatpenalty
from utils.models import DRL_Agent
from utils.backtest import backtest_stats, backtest_plot, get_baseline
import itertools
import sys
sys.path.append("../RL_in_Finance")

  'Module "zipline.assets" not found; multipliers will not be applied'


## 2、下载数据

数据来源：Tushare API<br>
当前用到的数据：SSE_50 和 CSI_300<br>
数据量的大小：shape[2892 * n, 8]

In [2]:
stock_list = config.SSE_50[:3]
df = Pull_data(stock_list, save_data=False).pull_data()

--- 开始下载 ----
--- 下载完成 ----
DataFrame 的大小:  (8688, 8)


In [3]:
df.sort_values(['date', 'tic'], ignore_index=True).head()

Unnamed: 0,date,tic,open,high,low,close,volume,day
0,2009-01-05,600000.SH,2.7584,2.8115,2.7258,2.8013,503142.56,0
1,2009-01-05,600009.SH,9.4665,9.6505,9.4414,9.5836,52100.33,0
2,2009-01-05,600016.SH,1.6369,1.6487,1.617,1.6448,947739.82,0
3,2009-01-06,600000.SH,2.8422,2.981,2.8422,2.9565,958496.0,1
4,2009-01-06,600009.SH,9.525,10.1021,9.4999,10.077,104182.13,1


In [4]:
print("数据下载的时间区间为：{} 至 {}".format(config.Start_Date, config.End_Date))

数据下载的时间区间为：20090101 至 20210101


In [5]:
print("下载的股票列表为: ")
print(stock_list)

下载的股票列表为: 
['600000.SH', '600009.SH', '600016.SH']


## 3、数据预处理

In [6]:
processed_df = FeatureEngineer(use_technical_indicator=True).preprocess_data(df)
processed_df['log_volume'] = np.log(processed_df.volume*processed_df.close)
processed_df['change'] = (processed_df.close-processed_df.open)/processed_df.close
processed_df['daily_variance'] = (processed_df.high-processed_df.low)/processed_df.close

成功添加技术指标
对当前时间段未上市的公司的所有行置零
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [7]:
processed_df = processed_df.fillna(0)

In [8]:
print("技术指标列表: ")
print(config.TECHNICAL_INDICATORS_LIST)
print("技术指标数: {}个".format(len(config.TECHNICAL_INDICATORS_LIST)))

技术指标列表: 
['boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'macd', 'volume_20_sma', 'volume_60_sma', 'volume_120_sma', 'close_20_sma', 'close_60_sma', 'close_120_sma']
技术指标数: 12个


In [9]:
processed_df.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,boll_ub,boll_lb,...,macd,volume_20_sma,volume_60_sma,volume_120_sma,close_20_sma,close_60_sma,close_120_sma,log_volume,change,daily_variance
0,2009-01-05,600000.SH,2.7584,2.8115,2.7258,2.8013,503142.56,0.0,0.0,0.0,...,0.0,503142.56,503142.56,503142.56,2.8013,2.8013,2.8013,14.158712,0.015314,0.030593
1,2009-01-05,600009.SH,9.4665,9.6505,9.4414,9.5836,52100.33,0.0,0.0,0.0,...,0.0,52100.33,52100.33,52100.33,9.5836,9.5836,9.5836,13.12098,0.012219,0.021819
2,2009-01-05,600016.SH,1.6369,1.6487,1.617,1.6448,947739.82,0.0,0.0,0.0,...,0.0,947739.82,947739.82,947739.82,1.6448,1.6448,1.6448,14.259454,0.004803,0.019273
3,2009-01-06,600000.SH,2.8422,2.981,2.8422,2.9565,958496.0,1.0,3.098386,2.659414,...,0.003482,730819.28,730819.28,730819.28,2.8789,2.8789,2.8789,14.857127,0.038661,0.046947
4,2009-01-06,600009.SH,9.525,10.1021,9.4999,10.077,104182.13,1.0,10.528073,9.132527,...,0.01107,78141.23,78141.23,78141.23,9.8303,9.8303,9.8303,13.864151,0.054778,0.05976


In [10]:
train_data = split_data(processed_df, config.Start_Trade_Date, config.End_Trade_Date)
test_data = split_data(processed_df, config.End_Trade_Date, config.End_Test_Date)

In [11]:
print("训练数据的范围：{} 至 {}".format(config.Start_Trade_Date, config.End_Trade_Date))
print("测试数据的范围：{} 至 {}".format(config.End_Trade_Date, config.End_Test_Date))
print("训练数据的长度: {},测试数据的长度:{}".format(len(train_data), len(test_data)))
print("训练集数据 : 测试集数据: {} : {}".format(round(len(train_data)/len(test_data),1), 1))

训练数据的范围：2009-01-01 至 2019-01-01
测试数据的范围：2019-01-01 至 2021-01-01
训练数据的长度: 7293,测试数据的长度:1461
训练集数据 : 测试集数据: 5.0 : 1


In [12]:
train_data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,boll_ub,boll_lb,...,macd,volume_20_sma,volume_60_sma,volume_120_sma,close_20_sma,close_60_sma,close_120_sma,log_volume,change,daily_variance
0,2009-01-05,600000.SH,2.7584,2.8115,2.7258,2.8013,503142.56,0.0,0.0,0.0,...,0.0,503142.56,503142.56,503142.56,2.8013,2.8013,2.8013,14.158712,0.015314,0.030593
0,2009-01-05,600009.SH,9.4665,9.6505,9.4414,9.5836,52100.33,0.0,0.0,0.0,...,0.0,52100.33,52100.33,52100.33,9.5836,9.5836,9.5836,13.12098,0.012219,0.021819
0,2009-01-05,600016.SH,1.6369,1.6487,1.617,1.6448,947739.82,0.0,0.0,0.0,...,0.0,947739.82,947739.82,947739.82,1.6448,1.6448,1.6448,14.259454,0.004803,0.019273
1,2009-01-06,600000.SH,2.8422,2.981,2.8422,2.9565,958496.0,1.0,3.098386,2.659414,...,0.003482,730819.28,730819.28,730819.28,2.8789,2.8789,2.8789,14.857127,0.038661,0.046947
1,2009-01-06,600009.SH,9.525,10.1021,9.4999,10.077,104182.13,1.0,10.528073,9.132527,...,0.01107,78141.23,78141.23,78141.23,9.8303,9.8303,9.8303,13.864151,0.054778,0.05976


In [13]:
test_data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,boll_ub,boll_lb,...,macd,volume_20_sma,volume_60_sma,volume_120_sma,close_20_sma,close_60_sma,close_120_sma,log_volume,change,daily_variance
0,2019-01-02,600000.SH,8.9589,9.0049,8.8117,8.9221,237628.22,2.0,10.502893,8.657577,...,-0.251945,215239.5065,243030.142333,221033.873083,9.580235,9.716063,9.475849,14.566994,-0.004125,0.021654
0,2019-01-02,600009.SH,49.6941,49.8119,48.8298,49.478,40051.97,2.0,51.140976,48.044854,...,0.025528,32632.976,53532.4315,57995.661333,49.592915,49.90637,53.10241,14.499461,-0.004368,0.019849
0,2019-01-02,600016.SH,5.0982,5.1071,5.0002,5.0358,579415.0,2.0,5.554261,4.922929,...,-0.108006,632488.8415,653332.693333,609303.477833,5.238595,5.419815,5.361655,14.886347,-0.012391,0.021228
1,2019-01-03,600000.SH,8.9221,9.0325,8.8853,9.0233,186542.62,3.0,10.429333,8.614317,...,-0.250136,214160.991,242279.348333,220740.681167,9.521825,9.710852,9.478228,14.336225,0.011215,0.016313
1,2019-01-03,600009.SH,49.478,49.478,48.3977,48.8102,42482.83,3.0,50.875727,48.076363,...,-0.037795,32945.677,53127.352,57999.520167,49.476045,49.83877,53.046981,14.544795,-0.013682,0.022133


## 4、初始化环境

**state_space 由四部分组成 :** <br>
1. 当天的资金量
2. 每只股票当天的收盘价
3. 每只股票当天的持仓量
4. 股票数 * 技术指标数<br>
5. 当天成交量

**reward 的计算方式：**<br>
* reward 交易前的总资产-当天交易后的总资产 = 当天交易的手续费
* TODO：待改进

**action_space 的空间：**<br>
  * actions ∈[-100, 100]
  * 正数表示买入，负数表示卖出，0表示不进行买入卖出操作
  * 绝对值表示买入卖出的数量

In [14]:
# stock_dimension = len(df.tic.unique())
# state_space = 1 + 2*stock_dimension + \
#     len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension + stock_dimension
# print("stock_dimension: {}, state_space: {}".format(stock_dimension, state_space))

In [15]:
# 初始化环境的参数
information_cols = config.TECHNICAL_INDICATORS_LIST + ["close", "day", "log_volume", "change", "daily_variance"]

e_train_gym = StockTradingEnvRetreatpenalty(df = train_data,initial_amount = 1e6,hmax = 5000, 
                                turbulence_threshold = None, 
                                currency='￥',
                                buy_cost_pct=3e-3,
                                sell_cost_pct=3e-3,
                                cache_indicator_data=True,
                                daily_information_cols = information_cols, 
                                print_verbosity = 500,
                                patient=True,
                                random_start = True)

e_trade_gym = StockTradingEnvRetreatpenalty(df = test_data,initial_amount = 1e6,hmax = 5000, 
                                turbulence_threshold = None, 
                                currency='￥',
                                buy_cost_pct=3e-3,
                                sell_cost_pct=3e-3,
                                cache_indicator_data=True,
                                daily_information_cols = information_cols, 
                                print_verbosity = 500,
                                patient=True,
                                random_start = False)

caching data
data cached!
caching data
data cached!


In [16]:
# 对环境进行测试
# %debug
observation = e_train_gym.reset() # 初始化环境，observation为环境状态
count = 0
total_reward = 0
for t in range(1600):
    action = e_train_gym.action_space.sample() # 随机采样动作
    observation, reward, done, info = e_train_gym.step(action) # 与环境交互，获得下一个state的值
    total_reward += reward
    if done:             
        break
    count+=1
    # time.sleep(0.2)      #每次等待 0.2s
print("count: ", count)
print("reward: {}, done: {}".format(total_reward, done))

EPISODE|STEPSTERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|RETREAT_PROPORTION
  actions = actions / self.closings
   0| 499update         |￥794,039       |￥973,197       |-51.19914%|-2.68028% |-97.04%   
   0| 999update         |￥765,222       |￥1,015,805     |-46.36222%|1.58048%  |-95.89%   
   0|1499update         |￥688,889       |￥1,240,265     |-24.72605%|24.02654% |-97.51%   
count:  1600
reward: -644.6063830852545, done: False


In [18]:
import multiprocessing

n_cores = multiprocessing.cpu_count() - 10
n_cores = 24
print("using {} cores".format(n_cores))

#this is our training env. It allows multiprocessing
env_train, _ = e_train_gym.get_multiproc_env(n = n_cores)

#this is our observation environment. It allows full diagnostics
env_trade, _ = e_trade_gym.get_sb_env()

using 10 cores
   2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%      2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%   
   2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%   
   2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%      2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%      2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%      2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%   
   2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%   
   2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%   



   2| 499|update         |￥1,000,000     |￥1,000,000     |0.00000%  |0.00000%  |100.00%   



## 5、开始训练

所用到的框架：stable_baseline3

In [20]:
agent = DRL_Agent(env = env_train)

In [21]:
# from torch.nn import Softsign, ReLU
ppo_params ={'n_steps': 256, 
             'ent_coef': 0.0, 
             'learning_rate': 0.000005, 
             'batch_size': 256, 
            'gamma': 0.99}

policy_kwargs = {
#     "activation_fn": ReLU,
    "net_arch": [1024 for _ in range(10)], 
#     "squash_output": True
}

model = agent.get_model("ppo",  
                        model_kwargs = ppo_params, 
                        policy_kwargs = policy_kwargs, verbose = 0)

# model = model.load("scaling_reward.model", env = env_train)

{'n_steps': 256, 'ent_coef': 0.0, 'learning_rate': 5e-06, 'batch_size': 256, 'gamma': 0.99}


  return torch._C._cuda_getDeviceCount() > 0


In [None]:
model.learn(total_timesteps = 30000, 
            eval_env = env_trade, 
            eval_freq = 500,
            log_interval = 1, 
            tb_log_name = 'env_cashpenalty_highlr',
            n_eval_episodes = 1)   



EPISODE|STEPS|TERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|CASH_PROPORTION


Exception in thread Thread-4:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.6/site-packages/tensorboard/summary/writer/event_file_writer.py", line 238, in run
    self._record_writer.write(data)
  File "/opt/conda/lib/python3.6/site-packages/tensorboard/summary/writer/record_writer.py", line 40, in write
    self._writer.write(header + header_crc + data + footer_crc)
  File "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/io/gfile.py", line 531, in write
    self.fs.append(self.filename, file_content, self.binary_mode)
  File "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/io/gfile.py", line 154, in append
    self._write(filename, file_content, "ab" if binary_mode else "a")
  File "/opt/conda/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/io/gfile.py", line 158, in _write
    with io.open(filename, mode, e

In [None]:
model.save("scaling_reward.model")

## 6、测试

In [None]:
df_account_value, df_actions = DRL_prediction(
    model=model, 
    environment = e_trade_gym)

In [None]:
print("回测的时间窗口：{} 至 {}".format(config.End_Trade_Date, config.End_Test_Date))

回测的时间窗口：2019-01-01 至 2021-01-01


In [None]:
df_account_value.to_csv("df_account_value.csv", index=False)
print("查看日账户净值")
print("开始: ")
print(df_account_value.head())
print("")
print("结束: ")
print(df_account_value.tail())

In [None]:
print("查看每日所作的交易")
df_actions.to_csv("df_actions.csv", index=False)
df_actions.tail()

## 7、回测

In [None]:
print("---------------------获取回测结果---------------------")
pref_stats_all = backtest_stats(account_value=df_account_value)

# perf_stats_all = pd.DataFrame(perf_stats_all)
# now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

In [None]:
# 获取 baseline 的结果
print("---------------------获取baseline结果---------------------")
baseline_df = get_baseline(config.SSE_50_INDEX, 
              start="20190101",
              end="20210101")
baseline_stats = backtest_stats(baseline_df, value_col_name='close')

In [None]:
# 删除 df_account_value 中重复的行
df_account_value.drop(df_account_value.index[1], inplace=True)

In [None]:
baseline_df.head(10)

In [None]:
print("---------------------Plot---------------------")
print("和 {} 指数进行比较".format(config.SSE_50_INDEX[0]))
%matplotlib inline
backtest_plot(df_account_value,
        baseline_start="20190101",
        baseline_end="20210101",
        baseline_ticker=config.SSE_50_INDEX,
      )