In [None]:
%run base.ipynb
%load_ext autoreload
%autoreload 2

import gym
import matplotlib.pyplot as plt

from abides_gym_market_making_environment_v2 import *
from policies_v1 import SigPolicy
from train_v2 import train
import utils

## Register the environment

In [None]:
# register market making env for gym use 
from gym.envs.registration import register

register(
    id="market-making-v2",
    entry_point=SubGymMarketsMarketMakingEnv_v2,
)

def generate_env(seed):
    """
    generates specific environment with the parameters defined and set the seed
    """
    env = gym.make(
            "market-making-v2",
            background_config="rmsc04",
            mkt_close="10:45:00",
            timestep_duration="10s",
            order_fixed_size=10,
            first_interval="00:10:00", #00:13:00
            observe_first_interval=True,
            max_inventory=100,
            mkt_order_alpha=0.2,
            terminal_inventory_reward=2,# reward
            inventory_reward_dampener=1., # 0.6,
            damp_mode="asymmetric",
            debug_mode=False
        )

    env.seed(seed)
    
    return env

# create the environment
env = generate_env(5)

## Signature policy

In [None]:
episodes = 250
lr = 0.0001
window_length = None
eps = 1.0
eps_decay = utils.linear_decay(epochs=20_000, start=eps, end=0.02)

sigpolicy = SigPolicy(env, 5)
sigpolicy.initialize_parameters(factor=1, zero_bias=True)

results = train(env, 
                sigpolicy, 
                episodes,
                discount=1.0,
                learning_rate=lr, 
                exploration="greedy",
                epsilon=eps,
                epsilon_decay=eps_decay,
                window_length=window_length, 
                printing=False)

In [None]:

utils.plot_results([
    #results["rewards"],
    [x if abs(x) < 400 else 0 for x in results["rewards"]],
    #results["losses"],
    [x if abs(x) < 2000 else 200 for x in results["losses"]],
    results["cash"],
    results["terminal_inventory"],
])

id = -2

observation_history = list(results["observations"][id])
plt.plot(observation_history)
plt.xlabel("Observation history")
plt.show()

plt.plot(results["actions"][id])
plt.xlabel("Action history")
plt.show()

plt.plot(results["mid_prices"][id])
plt.xlabel("Mid-price history")
plt.show()


In [None]:
print(results["losses"][20:25])
plt.plot(results["mid_prices"][1])

In [None]:
# results_reduced_100_runs_0804_1  # without inventory reward
# results_reduced_50_runs_0804_2  # with inventory reward
# results_reduced_50_runs_0804_3 # with inventory reward and correct epsilon decay
# results_v2_200_runs_0808_1_convergence # inventory only model, with some convergence
# results_v2_250_runs_0809_1 # inventory only model, with some convergence
# results_v2_250_runs_0809_2 # inventory only model, with some convergence

import pickle

"""
# write results 
with open('../results/results_NEW.pkl', 'wb') as f:  # open a text file
    pickle.dump(results, f) # serialize the list
f.close()


# load results
with open('../results/results_reduced_100_runs_0804_1.pkl', 'rb') as f:
    results = pickle.load(f)
f.close()
"""
