In [1]:
from pathlib import Path

from rljax.algorithm import DQN
from rljax.trainer import Trainer
import numpy as np
import pandas as pd

from classes import Env, Preprocess

PATH = Path().cwd()

In [17]:
def save_q_values(env, algo, file_name):
    q_values = list()
    for v in env.mapping.values():
        q_values.append(algo.net.apply(algo.params, np.array([v, 0])))
        
    labels = {0: 'Long/Short', 1: 'Short/Long', 2: 'Flat'}
    states = np.array(list(env.mapping.keys()))

    df = pd.DataFrame(q_values, columns=['Long/Short', 'Short/Long', 'Flat'])

    df.to_csv(PATH.joinpath('asset_data', file_name), index=False)

In [3]:
raw = Preprocess(PATH.joinpath('asset_data', 'TBT_TBF_9_27_data.csv'), res_bin = 7)
data = raw.process()

In [5]:
test_raw = Preprocess(PATH.joinpath('asset_data', 'TBT_TBF_9_28_data.csv'), res_bin = 7)
test_data = test_raw.process()

In [6]:
def return_rewards(current, last, action, p, c):
    return sum(current) - sum(last)

In [7]:
# 23,400 seconds between 9:30am and 4pm broken in 10 second increments

NUM_AGENT_STEPS = 5_000
SEED = 0
DAYS = 1
DAY = 2340//2

env = Env(data, no_trade_period=5, fixed_buy_cost=0, fixed_sell_cost=0, steps=DAY*DAYS, reward_func=return_rewards)
env_test = Env(test_data, no_trade_period=5, fixed_buy_cost=0, fixed_sell_cost=0, steps=DAY*DAYS, reward_func=return_rewards)

algo = DQN(
    num_agent_steps=NUM_AGENT_STEPS,
    state_space=env.observation_space,
    action_space=env.action_space,
    seed=SEED,
    batch_size=256,
    start_steps=1000,
    update_interval=1,
    update_interval_target=400,
    eps_decay_steps=0,
    loss_type="l2",
    lr=5e-5,
)

trainer = Trainer(
    env=env,
    env_test=env_test,
    algo=algo,
    log_dir="",
    num_agent_steps=NUM_AGENT_STEPS,
    eval_interval=25_00,
    seed=SEED,
)
trainer.train()



Num steps: 2500     Return: 0.0   (0.2  )   Time: 0:01:01
Num steps: 5000     Return: 0.0   (0.3  )   Time: 0:01:43


In [None]:
env.plot()

In [None]:
env_test.plot()

In [None]:
env_test.plot('position_history')

In [None]:
env_test.summarize_decisions()

In [None]:
import matplotlib.pyplot as plt

In [None]:
q_values = list()
for v in env.mapping.values():
    q_values.append(algo.net.apply(algo.params, np.array([v, 0])))

In [None]:
plt.figure(figsize=(15, 10))

labels = {0: 'Long/Short', 1: 'Short/Long', 2: 'Flat'}
states = np.array(list(env.mapping.keys()))

for idx, col in enumerate(np.array(q_values).T):
    sort = np.argsort(-np.array(q_values).T[0])
    plt.plot(states[sort], col[sort], label=labels[idx])
   
plt.title('Q-Values with No Costs')
plt.legend()
plt.xticks(rotation=90)
plt.grid()
plt.xlabel('Residual Imbalance States')

mini = -1
maxi = 5.5

plt.yticks(ticks=np.arange(mini, maxi, 0.33)[11:], labels=np.round(np.arange(-1, 1, 0.1)[11:], 2))
plt.savefig('figures/q_values_no_costs_sorted_long_short.png', format='png')
plt.show()

In [15]:
s = 'TBT_TBF_9_27_data.csv'
f'q_values_{s[:-9]}'

'q_values_TBT_TBF_9_27'

In [18]:
save_q_values(env, algo, 'TESTING_TEST.csv')

In [20]:
for file in {'TBT_TBF_9_27_data.csv', 'TBT_TBF_9_28_data.csv', 'TBT_TBF_9_29_data.csv', 'TBT_TBF_9_30_data.csv', 'TBT_TBF_10_1_data.csv'}:
    raw = Preprocess(PATH.joinpath('asset_data', file), res_bin = 7)
    data = raw.process()

    # 23,400 seconds between 9:30am and 4pm broken in 10 second increments

    NUM_AGENT_STEPS = 25_000
    SEED = 0
    DAYS = 1
    DAY = 2340//2

    env = Env(data, no_trade_period=5, fixed_buy_cost=0, fixed_sell_cost=0, steps=DAY*DAYS, reward_func=return_rewards)
    env_test = env.copy_env()

    algo = DQN(
        num_agent_steps=NUM_AGENT_STEPS,
        state_space=env.observation_space,
        action_space=env.action_space,
        seed=SEED,
        batch_size=256,
        start_steps=1000,
        update_interval=1,
        update_interval_target=400,
        eps_decay_steps=0,
        loss_type="l2",
        lr=5e-5,
    )

    trainer = Trainer(
        env=env,
        env_test=env_test,
        algo=algo,
        log_dir="",
        num_agent_steps=NUM_AGENT_STEPS,
        eval_interval=25_000,
        seed=SEED,
    )
    trainer.train()
    
    save_q_values(env, algo, f'q_values_{file[:-9]}.csv')

Num steps: 25000    Return: -0.0  (0.2  )   Time: 0:02:17
Num steps: 25000    Return: 0.0   (0.3  )   Time: 0:02:15
Num steps: 25000    Return: -0.0  (0.3  )   Time: 0:02:23
Num steps: 25000    Return: -0.0  (0.2  )   Time: 0:02:22
Num steps: 25000    Return: -0.0  (0.2  )   Time: 0:02:22
