In [1]:
from pathlib import Path

from rljax.algorithm import SAC_Discrete
from rljax.trainer import Trainer
import numpy as np
import pandas as pd

from micro_price_trading.config import TWENTY_SECOND_DAY
from micro_price_trading import Preprocess, OptimalExecutionEnvironment

PATH = Path().cwd()

In [2]:
raw = Preprocess('TBT_TBF_data.csv', res_bin=9)
data = raw.process()

In [3]:
def return_rewards(current, last, action, p, c):
    return sum(current) - sum(last)

In [None]:
# 23,400 seconds between 9:30am and 4pm broken in 10 second increments

NUM_AGENT_STEPS = 10000
SEED = 0

env = OptimalExecutionEnvironment(
    data,
    steps=TWENTY_SECOND_DAY,
    seed=SEED
)
env_test = env.copy_env()

algo = SAC_Discrete(
    num_agent_steps=NUM_AGENT_STEPS,
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=SEED,
    batch_size=256,
    start_steps=1000,
    update_interval=1,
    update_interval_target=400
)

trainer = Trainer(
    env=env,
    env_test=env_test,
    algo=algo,
    log_dir="",
    num_agent_steps=NUM_AGENT_STEPS,
    eval_interval=2000,
    seed=SEED,
)
trainer.train()



459
69
249
536
450
352
201
287
540
103
0
216
0
0
0
282
180
215
638
188
350
301
396
247
131
168
301
364
344
148
330
296
318
250
430
306
99
106
481
0
202
0
344
218
14
80
234
191
212
164
68
349
116
221
0
187
316
171
365
301
269
294
494
216
339
234
0
448
266
291
314
274
0
266
473
76
211
444
140
460
113
96
72
333
189
220
391
285
0
303
288
450
293
644
0
267
286
213
435
150
224
227
139
301
401
184
0
512
168
382
160
9
202
146
108
289
230
206
182
240
503
158
100
0
190
213
434
0
446
251
112
0
209
0
259
0
323
235
0
334
211
299
118
0
42
122
226
303
354
136
480
299
48
340
300
660
0
160
146
128
135
0
209
463
339
0
23
64
285
268
295
310
391
162
69
327
178
91
116
287
366
31
124
114
209
309
331
316
144
263
0
243
365
216
138
0
134
310
179
65
417
362
526
212
124
66
108
307
139
470
96
274
169
249
198
120
512
362
259
221
143
159
155
241
284
355
314
431
0
80
168
0
265
56
301
101
83
330
99
66
146
356
458
282
221
0
189
144
0
431
119
16
108
106
234
215
408
106
177
391
142
298
378
68
227
21
108
244
376
134
0
15

716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
870
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716
716


836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
1020
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836
836

8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
0
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
0
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8


0
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
0
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
8
Num steps: 8000     Return: 1013.7   Time: 0:04:31
41
244
298
263
14

In [None]:
env.plot()

In [None]:
env_test.plot()

In [None]:
env_test.plot('position_history')

In [None]:
env_test.plot('asset_paths')

In [None]:
env_test.plot('summarize_decisions')

In [None]:
env_test.plot('learning_progress')

In [None]:
env.portfolio_values

In [None]:
env.portfolio_history

In [None]:
env.portfolio_history