# RL in Finance(Test Cash Penalty) 
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sunnyswag/RL_in_Finance/blob/main/RL_in_Finance_Test_cash_penalty.ipynb)

## 1、拉取 github 仓库，下载并导入相关包
&emsp;&emsp;运行流程：python setup.py -> pip install -r requirements.txt

In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime
import time

%matplotlib inline
from utils import config
from utils.pull_data import Pull_data
from utils.preprocessors import FeatureEngineer, split_data
from utils.env_cash_penalty import StockTradingEnvCashpenalty
from utils.models import DRL_Agent
from utils.backtest import backtest_stats, backtest_plot, get_baseline
import itertools
import sys
sys.path.append("../RL_in_Finance")

  'Module "zipline.assets" not found; multipliers will not be applied'


## 2、下载数据

数据来源：Tushare API<br>
当前用到的数据：SSE_50 和 CSI_300<br>
数据量的大小：shape[2892 * n, 8]

In [3]:
stock_list = config.SSE_50
df = Pull_data(stock_list, save_data=False).pull_data()

--- 开始下载 ----
下载进度 : 20.0%
下载进度 : 40.0%
下载进度 : 60.0%
下载进度 : 80.0%
下载进度 : 100.0%
--- 下载完成 ----
DataFrame 的大小:  (114845, 8)


In [4]:
df.sort_values(['date', 'tic'], ignore_index=True).head()

Unnamed: 0,date,tic,open,high,low,close,volume,day
0,2009-01-05,600000.SH,2.7584,2.8115,2.7258,2.8013,503142.56,0
1,2009-01-05,600009.SH,9.4665,9.6505,9.4414,9.5836,52100.33,0
2,2009-01-05,600016.SH,1.6369,1.6487,1.617,1.6448,947739.82,0
3,2009-01-05,600028.SH,3.1966,3.2461,3.1786,3.2371,361085.66,0
4,2009-01-05,600030.SH,9.2667,9.5705,9.1654,9.5502,818978.41,0


In [5]:
print("数据下载的时间区间为：{} 至 {}".format(config.Start_Date, config.End_Date))

数据下载的时间区间为：20090101 至 20210101


In [6]:
print("下载的股票列表为: ")
print(stock_list)

下载的股票列表为: 
['600000.SH', '600009.SH', '600016.SH', '600028.SH', '600030.SH', '600031.SH', '600036.SH', '600048.SH', '600050.SH', '600104.SH', '600196.SH', '600276.SH', '600309.SH', '600519.SH', '600547.SH', '600570.SH', '600585.SH', '600588.SH', '600690.SH', '600703.SH', '600745.SH', '600837.SH', '600887.SH', '600918.SH', '601012.SH', '601066.SH', '601088.SH', '601138.SH', '601166.SH', '601186.SH', '601211.SH', '601236.SH', '601288.SH', '601318.SH', '601319.SH', '601336.SH', '601398.SH', '601601.SH', '601628.SH', '601668.SH', '601688.SH', '601816.SH', '601818.SH', '601857.SH', '601888.SH', '603160.SH', '603259.SH', '603288.SH', '603501.SH', '603986.SH']


## 3、数据预处理

In [7]:
processed_df = FeatureEngineer(use_technical_indicator=True).preprocess_data(df)
processed['log_volume'] = np.log(processed.volume*processed.close)
processed['change'] = (processed.close-processed.open)/processed.close
processed['daily_variance'] = (processed.high-processed.low)/processed.close

成功添加技术指标
对当前时间段未上市的公司的所有行置零


In [9]:
print("技术指标列表: ")
print(config.TECHNICAL_INDICATORS_LIST)
print("技术指标数: {}个".format(len(config.TECHNICAL_INDICATORS_LIST)))

技术指标列表: 
['boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'macd', 'volume_20_sma', 'volume_60_sma', 'volume_120_sma', 'close_20_sma', 'close_60_sma', 'close_120_sma']
技术指标数: 12个


In [8]:
processed_df.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,boll_ub,boll_lb,rsi_30,cci_30,dx_30,macd,volume_20_sma,volume_60_sma,volume_120_sma,close_20_sma,close_60_sma,close_120_sma
0,2009-01-05,600000.SH,2.7584,2.8115,2.7258,2.8013,503142.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,503142.56,503142.56,503142.56,2.8013,2.8013,2.8013
1,2009-01-05,600009.SH,9.4665,9.6505,9.4414,9.5836,52100.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52100.33,52100.33,52100.33,9.5836,9.5836,9.5836
2,2009-01-05,600016.SH,1.6369,1.6487,1.617,1.6448,947739.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,947739.82,947739.82,947739.82,1.6448,1.6448,1.6448
3,2009-01-05,600028.SH,3.1966,3.2461,3.1786,3.2371,361085.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,361085.66,361085.66,361085.66,3.2371,3.2371,3.2371
4,2009-01-05,600030.SH,9.2667,9.5705,9.1654,9.5502,818978.41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,818978.41,818978.41,818978.41,9.5502,9.5502,9.5502


In [10]:
train_data = split_data(processed_df, config.Start_Trade_Date, config.End_Trade_Date)
test_data = split_data(processed_df, config.End_Trade_Date, config.End_Test_Date)

In [11]:
print("训练数据的范围：{} 至 {}".format(config.Start_Trade_Date, config.End_Trade_Date))
print("测试数据的范围：{} 至 {}".format(config.End_Trade_Date, config.End_Test_Date))
print("训练数据的长度: {},测试数据的长度:{}".format(len(train_data), len(test_data)))
print("训练集数据 : 测试集数据: {} : {}".format(round(len(train_data)/len(test_data),1), 1))

训练数据的范围：2009-01-01 至 2019-01-01
测试数据的范围：2019-01-01 至 2021-01-01
训练数据的长度: 121550,测试数据的长度:24350
训练集数据 : 测试集数据: 5.0 : 1


In [12]:
train_data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,boll_ub,boll_lb,rsi_30,cci_30,dx_30,macd,volume_20_sma,volume_60_sma,volume_120_sma,close_20_sma,close_60_sma,close_120_sma
0,2009-01-05,600000.SH,2.7584,2.8115,2.7258,2.8013,503142.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,503142.56,503142.56,503142.56,2.8013,2.8013,2.8013
0,2009-01-05,600009.SH,9.4665,9.6505,9.4414,9.5836,52100.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52100.33,52100.33,52100.33,9.5836,9.5836,9.5836
0,2009-01-05,600016.SH,1.6369,1.6487,1.617,1.6448,947739.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,947739.82,947739.82,947739.82,1.6448,1.6448,1.6448
0,2009-01-05,600028.SH,3.1966,3.2461,3.1786,3.2371,361085.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,361085.66,361085.66,361085.66,3.2371,3.2371,3.2371
0,2009-01-05,600030.SH,9.2667,9.5705,9.1654,9.5502,818978.41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,818978.41,818978.41,818978.41,9.5502,9.5502,9.5502


In [62]:
test_data.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,boll_ub,boll_lb,rsi_30,cci_30,dx_30,macd,volume_20_sma,volume_60_sma,volume_120_sma,close_20_sma,close_60_sma,close_120_sma
0,2019-01-02,600000.SH,8.9589,9.0049,8.8117,8.9221,237628.22,2.0,10.502893,8.657577,38.67827,-163.240518,47.43877,-0.251945,215239.5,243030.1,221033.9,9.580235,9.716063,9.475849
0,2019-01-02,600009.SH,49.6941,49.8119,48.8298,49.478,40051.97,2.0,51.140976,48.044854,48.417073,14.661155,15.197694,0.025528,32632.98,53532.43,57995.66,49.592915,49.90637,53.10241
0,2019-01-02,600016.SH,5.0982,5.1071,5.0002,5.0358,579415.0,2.0,5.554261,4.922929,37.256908,-123.258937,50.542253,-0.108006,632488.8,653332.7,609303.5,5.238595,5.419815,5.361655
0,2019-01-02,600028.SH,4.4596,4.4684,4.3372,4.3809,2185583.7,2.0,5.502448,4.478342,31.069417,-283.154628,56.713794,-0.185334,1141945.0,1169240.0,1067717.0,4.990395,5.276185,5.487852
0,2019-01-02,600030.SH,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 4、初始化环境

**state_space 由四部分组成 :** <br>
1. 当天的资金量
2. 每只股票当天的收盘价
3. 每只股票当天的持仓量
4. 股票数 * 技术指标数<br>
5. 当天成交量

**reward 的计算方式：**<br>
* reward 交易前的总资产-当天交易后的总资产 = 当天交易的手续费
* TODO：待改进

**action_space 的空间：**<br>
  * actions ∈[-100, 100]
  * 正数表示买入，负数表示卖出，0表示不进行买入卖出操作
  * 绝对值表示买入卖出的数量

In [13]:
# stock_dimension = len(df.tic.unique())
# state_space = 1 + 2*stock_dimension + \
#     len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension + stock_dimension
# print("stock_dimension: {}, state_space: {}".format(stock_dimension, state_space))

In [14]:
# 初始化环境的参数
information_cols = config.TECHNICAL_INDICATORS_LIST + ["close", "day", "log_volume", "change", "daily_variance"]

e_train_gym = StockTradingEnvCashpenalty(df = train_data,initial_amount = 1e6,hmax = 5000, 
                                turbulence_threshold = None, 
                                currency='￥',
                                buy_cost_pct=3e-3,
                                sell_cost_pct=3e-3,
                                cash_penalty_proportion=0.2,
                                cache_indicator_data=True,
                                daily_information_cols = information_cols, 
                                print_verbosity = 500, 
                                random_start = True)

e_trade_gym = StockTradingEnvCashpenalty(df = test_data,initial_amount = 1e6,hmax = 5000, 
                                turbulence_threshold = None, 
                                currency='￥',
                                buy_cost_pct=3e-3,
                                sell_cost_pct=3e-3,
                                cash_penalty_proportion=0.2,
                                cache_indicator_data=True,
                                daily_information_cols = information_cols, 
                                print_verbosity = 500, 
                                random_start = False)

caching data
data cached!
caching data
data cached!


In [17]:
# 对环境进行测试
observation = e_train_gym.reset() # 初始化环境，observation为环境状态
count = 0
total_reward = 0
for t in range(100):
  action = e_train_gym.action_space.sample() # 随机采样动作
  observation, reward, done, info = e_train_gym.step(action) # 与环境交互，获得下一个state的值
  total_reward += reward
  if done:             
      break
  count+=1
  # time.sleep(0.2)      #每次等待 0.2s
print("count: ", count)
print("observation: ", observation)
print("reward: {}, done: {}".format(total_reward, done))

  actions = actions / self.closings


count:  100
observation:  [344864.5664951675, 0.0, 6281.39753150845, 10556.465293415804, 1888.9401953432287, 1567.637301907643, 760.3572735227943, 805.5705058366193, 9377.341847955347, 6274.3175369885275, 0.0, 220.9047892153185, 9738.131283325441, 3655.473670104796, 83.52019375596721, 4442.4861327072595, 1056.945647723765, 428.9471746557452, 5173.251316949659, 16819.501408601333, 1530.3608179986254, 1934.159245286381, 1450.7894530873896, 13390.499834641752, 0.0, 0.0, 0.0, 1869.381571270129, 0.0, 550.510030160087, 2269.8940033801987, 0.0, 0.0, 0.0, 406.0010651507694, 0.0, 0.0, 5393.481199359897, 1709.965085840975, 1730.8843584895953, 4109.825031469768, 0.0, 0.0, 0.0, 160.39711010459766, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.658485392601641, 5.157244607398349, 50.22263948946492, 1.0012549260691863, 2.9671881525031574, -0.1566723092111859, 1198284.0324999995, 910302.8511666663, 816084.441666667, 5.907864999999995, 6.682416666666667, 5.893577500000002, 6.2761, 1.0, 12.528216850025121, 11.2394131

In [18]:
import multiprocessing

n_cores = multiprocessing.cpu_count() - 2
print("using {} cores".format(n_cores))

#this is our training env. It allows multiprocessing
env_train, _ = e_train_gym.get_multiproc_env(n = n_cores)

#this is our observation environment. It allows full diagnostics
env_trade, _ = e_trade_gym.get_sb_env()

## 5、开始训练

所用到的框架：stable_baseline3

In [39]:
agent = DRLAgent(env = env_train)

In [40]:
# from torch.nn import Softsign, ReLU
ppo_params ={'n_steps': 256, 
             'ent_coef': 0.0, 
             'learning_rate': 0.000005, 
             'batch_size': 256, 
            'gamma': 0.99}

policy_kwargs = {
#     "activation_fn": ReLU,
    "net_arch": [1024 for _ in range(10)], 
#     "squash_output": True
}

model = agent.get_model("ppo",  
                        model_kwargs = ppo_params, 
                        policy_kwargs = policy_kwargs, verbose = 0)

# model = model.load("scaling_reward.model", env = env_train)

{'n_steps': 256, 'ent_coef': 0.0, 'learning_rate': 5e-06, 'batch_size': 1024, 'gamma': 0.99}


We recommend using a `batch_size` that is a multiple of `n_steps * n_envs`.
Info: (n_steps=256 and n_envs=1)
  f"You have specified a mini-batch size of {batch_size},"


{'n_steps': 256, 'ent_coef': 0.0, 'learning_rate': 5e-06, 'batch_size': 1024, 'gamma': 0.99}


In [None]:
model.learn(total_timesteps = 50000, 
            eval_env = env_trade, 
            eval_freq = 500,
            log_interval = 1, 
            tb_log_name = 'env_cashpenalty_highlr',
            n_eval_episodes = 1)   

  actions = actions / self.closings


   2|  91|CASH SHORTAGE  |￥22,525        |￥983,627       |-0.20942% |-1.63730% |2.29%     
   3| 113|CASH SHORTAGE  |￥33,615        |￥1,080,190     |-0.09047% |8.01899%  |3.11%     
   4| 125|CASH SHORTAGE  |￥33,116        |￥1,028,931     |-0.11499% |2.89314%  |3.22%     
   5| 116|CASH SHORTAGE  |￥28,769        |￥1,011,412     |-0.13974% |1.14117%  |2.84%     




EPISODE|STEPS|TERMINAL_REASON|CASH           |TOT_ASSETS     |TERMINAL_REWARD_unsc|GAINLOSS_PCT|CASH_PROPORTION
   1| 486|Last Date      |￥497,787       |￥1,209,804     |0.04317%  |20.98041% |41.15%    
Eval num_timesteps=500, episode_reward=0.14 +/- 0.00
Episode length: 487.00 +/- 0.00
New best mean reward!
   6|  90|CASH SHORTAGE  |￥31,559        |￥972,221       |-0.21185% |-2.77792% |3.25%     
   7| 110|CASH SHORTAGE  |￥646           |￥954,221       |-0.21453% |-4.57795% |0.07%     
   8| 174|CASH SHORTAGE  |￥5,766         |￥844,136       |-0.18329% |-15.58642%|0.68%     
   9|  84|CASH SHORTAGE  |￥37,070        |￥957,042       |-0.23488% |-4.29580% |3.87%     
   3| 486|Last Date      |￥230,505       |￥1,368,097     |0.06687%  |36.80968% |16.85%    
Eval num_timesteps=1000, episode_reward=0.22 +/- 0.00
Episode length: 487.00 +/- 0.00
New best mean reward!
  10| 138|CASH SHORTAGE  |￥38,990        |￥1,139,275     |-0.03593% |13.92751% |3.42%     
  11|  91|CASH SHORTAGE  |￥30,822   

<stable_baselines3.ppo.ppo.PPO at 0x7f94ee1a0410>

In [None]:
model.save("scaling_reward.model")

## 6、测试

In [None]:
df_account_value, df_actions = DRL_prediction(
    model=model, 
    environment = e_trade_gym)

In [None]:
print("回测的时间窗口：{} 至 {}".format(config.End_Trade_Date, config.End_Test_Date))

回测的时间窗口：2019-01-01 至 2021-01-01


In [None]:
df_account_value.to_csv("df_account_value.csv", index=False)
print("查看日账户净值")
print("开始: ")
print(df_account_value.head())
print("")
print("结束: ")
print(df_account_value.tail())

In [None]:
print("查看每日所作的交易")
df_actions.to_csv("df_actions.csv", index=False)
df_actions.tail()

## 7、回测

In [None]:
print("---------------------获取回测结果---------------------")
pref_stats_all = backtest_stats(account_value=df_account_value)

# perf_stats_all = pd.DataFrame(perf_stats_all)
# now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
# perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

In [None]:
# 获取 baseline 的结果
print("---------------------获取baseline结果---------------------")
baseline_df = get_baseline(config.SSE_50_INDEX, 
              start="20190101",
              end="20210101")
baseline_stats = backtest_stats(baseline_df, value_col_name='close')

In [None]:
# 删除 df_account_value 中重复的行
df_account_value.drop(df_account_value.index[1], inplace=True)

In [None]:
baseline_df.head(10)

In [None]:
print("---------------------Plot---------------------")
print("和 {} 指数进行比较".format(config.SSE_50_INDEX[0]))
%matplotlib inline
backtest_plot(df_account_value,
        baseline_start="20190101",
        baseline_end="20210101",
        baseline_ticker=config.SSE_50_INDEX,
      )