In [1]:
%run base.ipynb
import gym
import matplotlib.pyplot as plt

from abides_gym_market_making_environment import *
from policies import SigPolicy
from train import train
import utils

In [2]:
# register market making env for gym use 
from gym.envs.registration import register

register(
    id="market-making-v0",
    entry_point=SubGymMarketsMarketMakingEnv_v0,
)

def generate_env(seed):
    """
    generates specific environment with the parameters defined and set the seed
    """
    env = gym.make(
            "market-making-v0",
            background_config="rmsc04",
            mkt_close="11:45:00",
            timestep_duration="10s",
            order_fixed_size=100,
            first_intervall="00:10:00"
            max_inventory=1000,
            remaining_inventory_reward=-100,#penalty
            inventory_reward_dampener=0.6,
            damp_mode="asymmetric",
            debug_mode=False
        )

    env.seed(seed)
    
    return env

In [3]:
# create the environment
env = generate_env(1)

In [4]:
episodes = 250
lr = 0.0005
window_length = 30
eps = 0.9
eps_decay = utils.linear_decay(epochs=170000, start=eps, end=0.001)

sigpolicy = SigPolicy(env, 3)
sigpolicy.initialize_parameters(zero_bias=True)
print(sigpolicy.linear.weight.data)

results = train(env, 
                sigpolicy, 
                episodes, 
                learning_rate=lr, 
                epsilon=eps,
                epsilon_decay=eps_decay,
                window_length=window_length, 
                printing=False)

print(sigpolicy.linear.weight.data)


tensor([[ 0.0344, -0.0125, -0.0316,  0.0688, -0.0874,  0.1141, -0.1234, -0.1059,
          0.0409,  0.0752,  0.0696,  0.0223,  0.0550,  0.0975,  0.0738,  0.0363,
         -0.0550, -0.0853, -0.0376, -0.0317,  0.1070,  0.0412, -0.0537, -0.0913,
         -0.0525,  0.0435,  0.1151,  0.1326,  0.0104, -0.0813, -0.0876,  0.1248,
         -0.0902,  0.0607, -0.0273, -0.0496,  0.1155,  0.0834, -0.0300, -0.0107,
          0.0767, -0.0468, -0.0970,  0.0199, -0.1108,  0.0470,  0.0101, -0.0060,
         -0.0172, -0.0325,  0.0183,  0.0323, -0.0201, -0.1129, -0.0350, -0.0997]])
  0%|          | 0/1 [00:00<?, ?it/s]

reward: 0.0
pnl: 0
inv reward: 0.0
inventors: 0
loss: tensor(0.0316, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: 0.0
inventors: 0
loss: tensor(0.5867, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: 0.0
inventors: 0
loss: tensor(0.0102, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: 0.0
inventors: 0
loss: tensor(0.0054, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: 0.0
inventors: 0
loss: tensor(0.0009, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: 0.0
inventors: 0
loss: tensor(0.0001, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: 0.0
inventors: 0
loss: tensor(0.0229, grad_fn=<SmoothL1LossBackward>)
reward: 0.025
pnl: 0.025
inv reward: 0.0
inventors: -100
loss: tensor(0.0587, grad_fn=<SmoothL1LossBackward>)
reward: 0.025
pnl: 0
inv reward: 0.025
inventors: -100
loss: tensor(0.0005, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: -0.0
inventors: -100
loss: tensor(0.0009, gr

In [None]:
plt.plot(results["actions"][15])
#plt.plot([reward if reward < 50000 else 500 for reward in results["rewards"]])