# For running the script in colab

In [3]:
import argparse

from agent.dqn import Agent
from configurations import LOGGER

In [5]:
parser = argparse.ArgumentParser()
parser.add_argument('--window_size',
                    default=100,
                    help="Number of lags to include in the observation",
                    type=int)
parser.add_argument('--max_position',
                    default=10, #as used in paper
                    help="Maximum number of positions that are " +
                         "able to be held in a broker's inventory",
                    type=int)
parser.add_argument('--fitting_file',
                    default='XBTUSD_20200101_20200102_merge.csv.xz',
                    #default='demo_LTC-USD_20190926.csv.xz',
                    help="Data set for fitting the z-score scaler (previous day)",
                    type=str)
parser.add_argument('--testing_file',
                    default='paper_data/XBTUSD_2020-01-03.csv.xz',
                    help="Data set for training the agent (current day)",
                    type=str)
parser.add_argument('--symbol',
                    default='XBTUSD',
                    help="Name of currency pair or instrument",
                    type=str)
parser.add_argument('--id',
                    default='market-maker-v0',
                    #default='trend-following-v0',
                    help="Environment ID; Either 'trend-following-v0' or "
                         "'market-maker-v0'",
                    type=str)
parser.add_argument('--number_of_training_steps',
                    default=1000000,
                    help="Number of steps to train the agent "
                         "(does not include action repeats)",
                    type=int)
parser.add_argument('--gamma',
                    default=0.99,
                    help="Discount for future rewards",
                    type=float)
parser.add_argument('--seed',
                    default=1,
                    help="Random number seed for data set",
                    type=int)
parser.add_argument('--action_repeats',
                    default=5,
                    help="Number of steps to pass on between actions",
                    type=int)
parser.add_argument('--load_weights',
                    default=False,
                    help="Load saved load_weights if TRUE, otherwise start from scratch",
                    type=bool)
parser.add_argument('--visualize',
                    default=False,
                    help="Render midpoint on a screen",
                    type=bool)
parser.add_argument('--training',
                    default=True,
                    help="Training or testing mode. " +
                         "If TRUE, then agent starts learning, " +
                         "If FALSE, then agent is tested",
                    type=bool)
parser.add_argument('--reward_type',
                    default='trade_completion',
                    choices=['default',
                             'default_with_fills',
                             'realized_pnl',
                             'differential_sharpe_ratio',
                             'asymmetrical',
                             'trade_completion'],
                    help="""
                    reward_type: method for calculating the environment's reward:
                    1) 'default' --> inventory count * change in midpoint price returns
                    2) 'default_with_fills' --> inventory count * change in midpoint  
                    price returns + closed trade PnL
                    3) 'realized_pnl' --> change in realized pnl between time steps
                    4) 'differential_sharpe_ratio' -->
                    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1.7210&rep=rep1
                    &type=pdf
                    5) 'asymmetrical' --> extended version of *default* and enhanced 
                    with  a reward for being filled above or below midpoint, 
                    and returns only negative rewards for Unrealized PnL to discourage 
                    long-term speculation.
                    6) 'trade_completion' --> reward is generated per trade's round trip
                    """,
                    type=str)
parser.add_argument('--nn_type',
                    default='mlp',
                    help="Type of neural network to use: 'cnn' or 'mlp' ",
                    type=str)
parser.add_argument('--dueling_network',
                    default=True,
                    help="If TRUE, use Dueling architecture in DQN",
                    type=bool)
parser.add_argument('--double_dqn',
                    default=True,
                    help="If TRUE, use double DQN for Q-value estimation",
                    type=bool)
args = vars(parser.parse_args(""))

In [7]:
def main(kwargs):
    LOGGER.info(f'Experiment creating agent with kwargs: {kwargs}')
    agent = Agent(**kwargs)
    LOGGER.info(f'Agent created. {agent}')
    agent.start()

In [8]:
main(kwargs=args)

[2023-05-02 11:11:07,130, 711246497.py:2] Experiment creating agent with kwargs: {'window_size': 100, 'max_position': 10, 'fitting_file': 'XBTUSD_20200101_20200102_merge.csv.xz', 'testing_file': 'paper_data/XBTUSD_2020-01-03.csv.xz', 'symbol': 'XBTUSD', 'id': 'market-maker-v0', 'number_of_training_steps': 1000000, 'gamma': 0.99, 'seed': 1, 'action_repeats': 5, 'load_weights': False, 'visualize': False, 'training': True, 'reward_type': 'trade_completion', 'nn_type': 'mlp', 'dueling_network': True, 'double_dqn': True}
[2023-05-02 11:11:07,138, ema.py:67] EMA smoothing ENABLED: 0.99
[2023-05-02 11:11:25,861, data_pipeline.py:49] Imported 101_20200102_merge.csv.xz from a csv in 18 seconds
[2023-05-02 11:11:42,235, ema.py:93] Applying EMA to data...
[2023-05-02 11:11:47,435, data_pipeline.py:49] Imported /XBTUSD_2020-01-03.csv.xz from a csv in 3 seconds
[2023-05-02 11:11:47,802, ema.py:93] Applying EMA to data...
[2023-05-02 11:11:48,493, data_pipeline.py:228] Adding order imbalances...
[20

Resetting environment #1 on episode #0.
market-maker-v0 XBTUSD #1 instantiated
observation_space: (100, 174) reward_type = TRADE_COMPLETION max_steps = 86345
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1, 100, 256)       44800     
                                                                 
 dense_1 (Dense)             (None, 1, 100, 256)       65792     
                                                                 
 flatten (Flatten)           (None, 25600)             0         
                                                                 
 dense_2 (Dense)             (None, 17)                435217    
                                                                 
Total params: 545,809
Trainable params: 545,809
Non-trainable params: 0
_________________________________________________________________


[2023-05-02 11:11:49,495, dqn.py:125] None
2023-05-02 11:11:49.674730: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-05-02 11:11:49.675552: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2023-05-02 11:11:49.675746: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (vmd102699.contaboserver.net): /proc/driver/nvidia/version does not exist
2023-05-02 11:11:49.680906: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-02 11:11:49.702057: I tensorflow/compiler/ml

Resetting environment #1 on episode #0.


[2023-05-02 11:11:55,389, dqn.py:172] training over.
[2023-05-02 11:11:55,392, dqn.py:173] Saving AGENT weights...
[2023-05-02 11:11:55,511, dqn.py:175] AGENT weights saved.
