In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import pickle

import numpy as np
import pandas as pd
from multiprocessing import Pool

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import gc

In [3]:
sys.path.append("../environment")
sys.path.append("../agent/")

from market_env import HFTMarketEnvironment
from baseline_agent import FixedOffsetMMAgent
from deep_q_agent import DQLAgent

In [4]:
def read_rl_data(date):
    data_path = "../rl_data"
    with open(f"{data_path}/bid_ask/{date}.pickle", 'rb') as f:
        ba = pickle.load(f)
    with open(f"{data_path}/last_trade/{date}.pickle", 'rb') as f:
        lt = pickle.load(f)
    return ba, lt

# Baseline agent

In [5]:
dates_list = os.listdir("../rl_data/bid_ask/")
dates_list = sorted([x.split(".pickle")[0] for x in dates_list if x != ".ipynb_checkpoints"])
dates_list

['2024-01-08',
 '2024-01-09',
 '2024-01-10',
 '2024-01-11',
 '2024-01-12',
 '2024-01-13',
 '2024-01-14',
 '2024-01-15',
 '2024-01-16',
 '2024-01-17',
 '2024-01-18',
 '2024-01-19',
 '2024-01-20',
 '2024-01-21']

In [6]:
def run_deepq_train_agent_mlp(date):
    ba, lt = read_rl_data(date)

    params = {
        "bid_ask": ba,
        "trades": lt,
        "balance": {"BTC": 0.2, "FDUSD": 10000},
        "cancel_frequency": 10
    }
    
    env = HFTMarketEnvironment(params)
    dql_agent = DQLAgent(
        state_dim=39, 
        lr=5e-4,
        gamma=0.99,
        epsilon=1.0,
        epsilon_decay=0.999,
        epsilon_min=0.01,
        batch_size=512,
        memory_size=10000
    )
    train_rewards = dql_agent.train(env, num_episodes=20)
    with open(f"../results/deepq_v1/agent_train_{date}.pickle", 'wb') as file:
        pickle.dump(dql_agent, file)
    del dql_agent, ba, lt
    gc.collect()
    return train_rewards 

In [None]:
with Pool(processes=4) as pool:
    results = list(tqdm(pool.imap(run_deepq_train_agent_mlp, dates_list), total=len(dates_list)))

  0%|          | 0/14 [00:00<?, ?it/s]

Episode: 1/20, Total Reward: -1203.02209999986
Episode: 1/20, Total Reward: -1747.36359999984
Episode: 1/20, Total Reward: -1953.069849999863
Episode: 1/20, Total Reward: -2127.2039999996464
Episode: 2/20, Total Reward: -422.8409999999515
Episode: 2/20, Total Reward: -81.05959999997897
Episode: 2/20, Total Reward: -697.1220500000304
Episode: 2/20, Total Reward: 399.8933500000279
Episode: 3/20, Total Reward: -245.2244999999453
Episode: 3/20, Total Reward: -642.1658499999817
Episode: 3/20, Total Reward: -411.5532500000809
Episode: 3/20, Total Reward: -549.086299999962
Episode: 4/20, Total Reward: -773.4049999999681
Episode: 4/20, Total Reward: -878.9024499999922
Episode: 4/20, Total Reward: -1479.2459500002153
Episode: 5/20, Total Reward: -76.67229999992995
Episode: 4/20, Total Reward: 4.67480000002709
Episode: 5/20, Total Reward: -880.8006999999924
Episode: 5/20, Total Reward: -84.30800000002083
Episode: 6/20, Total Reward: -679.7363999999571
Episode: 5/20, Total Reward: 632.32060000000

In [8]:
results

[-4636.078150000884,
 -4867.749500000835,
 -6002.92705000109,
 -5493.853800001097,
 -5699.413600001135,
 -2490.9348500005626,
 -2119.2477500004757,
 -2292.4975500006067,
 -2468.9524000006213,
 -2113.000650000483,
 -2853.7216000005765,
 -3163.586150000681,
 -832.314650000216,
 -582.8148500001222]