In [1]:
%run base.ipynb
import gym
import matplotlib.pyplot as plt

from abides_gym_market_making_environment import *
from policies import SigPolicy
from train import train
import utils

In [2]:
# register market making env for gym use 
from gym.envs.registration import register

register(
    id="market-making-v0",
    entry_point=SubGymMarketsMarketMakingEnv_v0,
)

def generate_env(seed):
    """
    generates specific environment with the parameters defined and set the seed
    """
    env = gym.make(
            "market-making-v0",
            background_config="rmsc04",
            mkt_close="11:45:00",
            timestep_duration="10s",
            order_fixed_size=100,
            first_interval="00:10:00",
            max_inventory=1000,
            remaining_inventory_reward=-100,#penalty
            inventory_reward_dampener=0.6,
            damp_mode="asymmetric",
            debug_mode=False
        )

    env.seed(seed)
    
    return env

In [3]:
# create the environment
env = generate_env(1)

In [4]:
episodes = 250
lr = 0.0005
window_length = 30
eps = 0.9
eps_decay = utils.linear_decay(epochs=170000, start=eps, end=0.001)

sigpolicy = SigPolicy(env, 3)
sigpolicy.initialize_parameters(zero_bias=True)
print(sigpolicy.linear.weight.data)

results = train(env, 
                sigpolicy, 
                episodes, 
                learning_rate=lr, 
                epsilon=eps,
                epsilon_decay=eps_decay,
                window_length=window_length, 
                printing=False)

print(sigpolicy.linear.weight.data)


tensor([[ 0.0579, -0.0357,  0.0119,  0.1326,  0.1309, -0.0637, -0.0314, -0.1207,
          0.0194,  0.0891,  0.0976, -0.0582, -0.0540, -0.1195, -0.1181,  0.0731,
         -0.1091, -0.0710, -0.1059,  0.0315, -0.1074,  0.0472, -0.0041, -0.1051,
          0.0046, -0.0137,  0.0452, -0.0524,  0.0935, -0.1253, -0.0627,  0.1022,
         -0.1014,  0.0430,  0.1289, -0.1123, -0.0412, -0.0867,  0.0973, -0.1244,
          0.1219,  0.0151,  0.1109, -0.1010, -0.0092, -0.1128, -0.0658,  0.1152,
          0.0303,  0.1240,  0.1034, -0.1040, -0.1048, -0.0374,  0.1105, -0.0226]])
  0%|          | 0/1 [00:00<?, ?it/s]reward: 0.015
pnl: 0.015
inv reward: 0.0
inventors: -100
loss: tensor(0.1089, grad_fn=<SmoothL1LossBackward>)
reward: 0.062150000000000004
pnl: 0.01215
inv reward: 0.05
inventors: -19
loss: tensor(0.1005, grad_fn=<SmoothL1LossBackward>)
reward: 0.0
pnl: 0
inv reward: -0.0
inventors: -19
loss: tensor(0.0280, grad_fn=<SmoothL1LossBackward>)
reward: -0.0095
pnl: 0
inv reward: -0.0095
inventors:

In [None]:
plt.plot(results["actions"][15])
#plt.plot([reward if reward < 50000 else 500 for reward in results["rewards"]])