/
cmd_util.py
212 lines (194 loc) · 8.49 KB
/
cmd_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# -*- coding:utf-8 -*-
import os
import random
import numpy as np
import torch
from tenvs.market import Market
from tenvs.scenario import make_env as _make_env
def set_global_seeds(seed):
if seed is None:
seed = np.random.randint(int(1e6))
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
def make_env(args):
"""
Create a wrapped, monitored gym.Env for tenvs.
"""
ts_token = os.getenv("TUSHARE_TOKEN")
codes = args.codes.split(",")
indexs = args.indexs.split(",")
m = Market(
ts_token=ts_token,
start=args.start,
end=args.end,
codes=codes,
indexs=indexs,
data_dir=args.data_dir)
used_infos = ["equities_hfq_info", "indexs_info"]
env = _make_env(
scenario=args.scenario,
market=m,
investment=args.investment,
look_back_days=args.look_back_days,
used_infos=used_infos,
reward_fn=args.reward_fn,
log_deals=args.log_deals)
return env
def make_eval_env(args):
"""
Create a wrapped, monitored gym.Env for tenvs.
"""
ts_token = os.getenv("TUSHARE_TOKEN")
codes = args.codes.split(",")
indexs = args.indexs.split(",")
m = Market(
ts_token=ts_token,
start=args.eval_start,
end=args.eval_end,
codes=codes,
indexs=indexs,
data_dir=args.data_dir)
used_infos = ["equities_hfq_info", "indexs_info"]
env = _make_env(
scenario=args.scenario,
market=m,
investment=args.investment,
look_back_days=args.look_back_days,
used_infos=used_infos,
reward_fn=args.reward_fn,
log_deals=args.log_deals)
return env
# NOTE: 加载前pre_load_days的数据
def get_infer_start_day(end, pre_load_days=30):
import datetime
DATE_FORMAT = '%Y%m%d'
end_date = datetime.datetime.strptime(end, DATE_FORMAT)
start_date = end_date - datetime.timedelta(days=pre_load_days)
return start_date.strftime(DATE_FORMAT)
def make_infer_env(args):
ts_token = os.getenv("TUSHARE_TOKEN")
codes = args.codes.split(",")
indexs = args.indexs.split(",")
end = args.infer_date
pre_load_days = 30 + args.look_back_days
start = get_infer_start_day(end, pre_load_days)
m = Market(
ts_token=ts_token,
start=start,
end=end,
codes=codes,
indexs=indexs,
data_dir=args.data_dir)
used_infos = ["equities_hfq_info", "indexs_info"]
env = _make_env(
scenario=args.scenario,
market=m,
investment=args.investment,
look_back_days=args.look_back_days,
used_infos=used_infos,
reward_fn=args.reward_fn,
log_deals=args.log_deals)
return env
def common_arg_parser():
"""
Create an argparse.ArgumentParser for trading agents.
"""
import argparse
parser = argparse.ArgumentParser("reinforcement learning trading agents")
# 环境
parser.add_argument('--scenario', help='environment scenario', type=str,
default='average')
parser.add_argument("--codes", type=str, default="000001.SZ",
help="tushare code of the experiment stocks")
parser.add_argument(
"--indexs", type=str, default="000001.SH,399001.SZ",
help="tushare code of the indexs, 000001.SH:沪指, \
399001.SZ: 深指, detal:https://tushare.pro/document/2?doc_id=94")
parser.add_argument("--start", type=str, default='20190101',
help="when start the game")
parser.add_argument("--end", type=str, default='20191231',
help="when end the game")
parser.add_argument("--eval_start", type=str, default='20200101',
help="when start eval the agent")
parser.add_argument("--eval_end", type=str, default='20200223',
help="when end eval the agent")
parser.add_argument("--investment", type=float, default=100000,
help="the investment for each stock")
parser.add_argument("--look_back_days", type=int, default=10,
help="how many days shoud look back")
parser.add_argument("--data_dir", type=str, default='/tmp/tenvs',
help="directory for tenvs store trade data")
parser.add_argument('--num_env', default=1, type=int,
help='Number of environment copies run in parallel.')
parser.add_argument('--device', type=str, default=None)
# 模型参数
parser.add_argument('--policy_net', default='LSTM_MLP',
help='network type (LSTM_MLP)')
parser.add_argument('--value_net', default='LSTM_Merge_MLP',
help='network type (LSTM_Merge_MLP)')
parser.add_argument('--reward_fn',
default="daily_return_with_chl_penalty",
help='reward function')
parser.add_argument('--activation',
default="tanh",
help='activation function')
parser.add_argument('--entropy_coef', type=float, default=0.1,
help='entropy term coefficient (default: 0.1)')
parser.add_argument('--value_loss_coef', type=float, default=0.5,
help='value loss coefficient (default: 0.5)')
parser.add_argument("--t_max", type=int, default=10,
help="how many step should update parameters(A2C)")
# 训练参数
parser.add_argument('--seed', help='RNG seed', type=int, default=None)
parser.add_argument('--alg', help='rl algorithm', type=str, default='ddpg')
parser.add_argument('--opt_fn', type=str, default='rmsprop',
help='optimizer function')
parser.add_argument("--gamma", type=float, default=0.53,
help="discount factor")
parser.add_argument("--lr", type=float, default=0.001,
help="learning rate")
parser.add_argument("--max_grad_norm", type=float, default=5,
help="max gradient norm for clip")
parser.add_argument("--tau", type=int, default=0.95,
help="how depth we exchange the parameters of the nn")
parser.add_argument('--explore_size', type=int, default=400)
parser.add_argument('--sample_size', type=int, default=200)
parser.add_argument('--warm_up', type=int, default=10000)
parser.add_argument('--max_iter_num', type=int, default=500, metavar='N',
help='maximal number of main iterations')
# 输出相关
parser.add_argument('--model_dir', help='dir to save trained model',
default=os.path.join("/tmp", "tbase", "models"),
type=str)
parser.add_argument('--log_interval', type=int, default=10, metavar='N',
help='interval between training status(default:10)')
parser.add_argument('--clear_memory_interval', type=int, default=5)
parser.add_argument('--tensorboard_dir',
default=os.path.join("/tmp", "tbase", "tensorboard"),
type=str,
help='Directory to save learning curve data.')
parser.add_argument('--progress_bar_path', help='path to save progress',
default=os.path.join("/tmp", "tbase", "bar.txt"),
type=str)
parser.add_argument('--eval_result_path', help='path to save eval result',
default=os.path.join("/tmp", "tbase", "eval.txt"),
type=str)
parser.add_argument("--infer_date", type=str, default='20200408',
help="infer action date")
parser.add_argument('--infer_result_path',
help='path to save infer result',
default=os.path.join("/tmp", "tbase", "infer.txt"),
type=str)
parser.add_argument('--print_action', default=False, action='store_true')
parser.add_argument('--debug', default=False, action='store_true')
parser.add_argument('--log_deals', default=False, action='store_true')
# 运行参数
parser.add_argument('--play', default=False, action='store_true')
parser.add_argument('--run_id', type=int, default=1)
parser.add_argument('--eval', default=False, action='store_true')
parser.add_argument('--infer', default=False, action='store_true')
return parser.parse_args()