# envs

> Training environments implementing the gym api

In [None]:
# | default_exp envs

In [None]:
# | hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# | hide
from fastcore.test import *
from hydra import compose
from hydra import initialize
from hydra.utils import instantiate
from nbdev.showdoc import *

In [None]:
# | export

from collections import namedtuple
from typing import *

from fastcore.basics import patch
import gymnasium as gym
import numpy as np
import pandas as pd

from rlmm.core import *
from rlmm.datasets import *

In [None]:
# Type variable to keep input action type consistent
# ACT = TypeVar("ACT")

In [None]:
# | export
side = Literal["bid", "ask"]

In [None]:
side

typing.Literal['bid', 'ask']

In [None]:
# | export
Order = namedtuple("Order", ["idx", "side", "price", "amount", "ahead", "age"])

In [None]:
o = Order(idx=0, side="ask", price=2000, amount=100, ahead=0.0, age=0.0)
o

Order(idx=0, side='ask', price=2000, amount=100, ahead=0.0, age=0.0)

In [None]:
# | export


class BaseEnv(gym.Env):
    pass

In [None]:
# | export


class OrderBookEnv(BaseEnv):
    metadata = {"render_modes": None}

    def __init__(
        self,
        dataset_book: DatasetBook,
        dataset_trades: DatasetTrades,
        initial_cash: float = 1_000_000,
        initial_inventory: float = 100,
        min_value: float = 0,
        order_max_age: Union[int, None] = 10,
        episode_length: Union[int, None] = 100,
        seed: Union[int, None] = 42,
        render_mode: Union[str, None] = None,
    ) -> None:
        self.dataset_book = dataset_book
        self.dataset_trades = dataset_trades
        self.initial_cash = initial_cash
        self.initial_inventory = initial_inventory
        self.min_value = min_value
        self.order_max_age = order_max_age
        self.episode_length = episode_length
        self.seed = seed
        self.render_mode = render_mode

        self.observation_space = gym.spaces.Dict(
            {
                "book_volumes": gym.spaces.Box(
                    low=0, high=np.inf, shape=(self.dataset_book.price_levels * 2,)
                ),
                "order_volumes": gym.spaces.Box(
                    low=0, high=np.inf, shape=(self.dataset_book.price_levels * 2,)
                ),
                "cash": gym.spaces.Box(low=0, high=np.inf, shape=(1,)),
                "inventory": gym.spaces.Box(low=0, high=np.inf, shape=(1,)),
            }
        )

        self.action_space = gym.spaces.Sequence(
            gym.spaces.Tuple(
                (
                    gym.spaces.Discrete(2),
                    gym.spaces.Discrete(self.dataset_book.price_levels),
                    gym.spaces.Box(low=0, high=1, shape=(1,)),
                )
            )
        )

        # Allow having resample `False` in the reset options
        # self.df_book, self.df_trades = self.resample_book(length=self.episode_length)

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        self.df_book = None
        self.df_trades = None

        self.idx = None
        self.idx_end = None

        self.book_prices_ask = None
        self.book_prices_bid = None
        self.book_volumes_ask = None
        self.book_volumes_bid = None
        self.order_volumes_ask = None
        self.order_volumes_bid = None

        self.midprice = None
        self.cash = None
        self.inventory = None
        self.portfolio_value = None

        self.oid = None
        self.df_orders = None

        self.reset(seed=self.seed, options={"resample": True})

In [None]:
params = {
    "_target_": "rlmm.envs.OrderBookEnv",
    # "dataset_book": {"_target_": "rlmm.datasets.DatasetBook"},
    # "dataset_trades": {"_target_": "rlmm.datasets.DatasetBook"},
}

hydra_nb(obj=OrderBookEnv, path="../conf/envs/order_book.yaml", params=params)

initial_cash: 1000000
initial_inventory: 100
min_value: 0
order_max_age: 10
episode_length: 100
seed: 42
render_mode: null
_target_: rlmm.envs.OrderBookEnv



In [None]:
with initialize(version_base=None, config_path="../conf"):
    cfg = compose(config_name="conf.yaml")

cfg

{'a': 1, 'dataset_book': {'time_start': '21_03_05-08_30_00', 'time_end': '21_03_05-10_30_00', 'time_format': '%y_%m_%d-%H_%M_%S', 'resample_interval': 'minute', 'resample_frequency': 1, 'path': '../data/test/parquet/book_snapshot_25/ETHUSDT', 'price_levels': 10, 'col_prices_ask_re': '(?=.*price)(?=.*ask)', 'col_prices_bid_re': '(?=.*price)(?=.*bid)', 'col_volumes_ask_re': '(?=.*amount)(?=.*ask)', 'col_volumes_bid_re': '(?=.*amount)(?=.*bid)', 'col_sort': '\\d+', '_target_': 'rlmm.datasets.DatasetBook'}, 'dataset_trades': {'time_start': '21_03_05-08_30_00', 'time_end': '21_03_05-10_30_00', 'time_format': '%y_%m_%d-%H_%M_%S', 'resample_interval': 'minute', 'resample_frequency': 1, 'path': '../data/test/parquet/trades/ETHUSDT', 'col_side': 'side', 'col_price': 'price', 'col_amount': 'amount', '_target_': 'rlmm.datasets.DatasetTrades'}, 'envs': {'initial_cash': 1000000, 'initial_inventory': 100, 'min_value': 0, 'order_max_age': 10, 'episode_length': 100, 'seed': 42, 'render_mode': None, '_

In [None]:
dataset_book = instantiate(cfg.dataset_book)
dataset_book.df.head()

Unnamed: 0_level_0,asks[0].price,asks[1].price,asks[2].price,asks[3].price,asks[4].price,asks[5].price,asks[6].price,asks[7].price,asks[8].price,asks[9].price,...,bids[0].amount,bids[1].amount,bids[2].amount,bids[3].amount,bids[4].amount,bids[5].amount,bids[6].amount,bids[7].amount,bids[8].amount,bids[9].amount
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-05 00:00:00,1539.24,1539.5,1539.63,1539.64,1539.65,1539.71,1539.75,1539.76,1539.78,1539.79,...,7.93114,6.45542,8.88544,0.375,1.0,0.3825,1.545,0.2475,1.9,2.59871
2021-03-05 00:01:00,1541.3,1541.32,1541.33,1541.41,1541.42,1541.43,1541.45,1541.46,1541.5,1541.53,...,9.37198,14.95115,5.00579,4.7,15.0,7.55308,2.74667,0.585,2.9,2.59544
2021-03-05 00:02:00,1533.44,1533.45,1533.46,1533.49,1533.56,1533.64,1533.66,1533.67,1533.7,1533.74,...,0.4,5.94615,0.58749,6.19893,0.31871,2.47229,0.026,1.0,2.61894,0.01362
2021-03-05 00:03:00,1528.99,1529.0,1529.12,1529.13,1529.24,1529.28,1529.35,1529.38,1529.49,1529.51,...,0.76963,0.95991,4.64718,21.6,1.0,2.0,14.11699,2.61644,7.84923,19.268
2021-03-05 00:04:00,1525.18,1525.22,1525.23,1525.3,1525.4,1525.41,1525.46,1525.48,1525.5,1525.51,...,8.03,0.00721,0.00776,0.02185,0.09188,0.02898,0.01067,1.38097,0.00844,0.01363


In [None]:
dataset_trades = instantiate(cfg.dataset_trades)
dataset_trades.df.head()

Unnamed: 0_level_0,side,price,amount
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-03-05 00:00:00,sell,1539.22,0.01318
2021-03-05 00:01:00,sell,1541.06,0.04885
2021-03-05 00:02:00,sell,1533.2,0.08495
2021-03-05 00:03:00,buy,1529.0,0.99997
2021-03-05 00:04:00,buy,1525.18,1.07


In [None]:
env = OrderBookEnv(
    dataset_book=dataset_book,
    dataset_trades=dataset_trades,
    initial_cash=1_000_000,
    initial_inventory=1_000,
)

In [None]:
# Exists due to existing impl
env.reset()

In [None]:
# | export


@patch
def resample_book(self: OrderBookEnv, length: Union[int, None] = None) -> pd.DataFrame:
    if length is None:
        df_book = self.dataset_book[:]
    else:
        length_dataset = len(self.dataset_book)
        if length > length_dataset:
            raise ValueError(
                f"Length {length} is greater than dataset length {length_dataset}"
            )

        start = self.np_random.integers(0, length_dataset - length)
        df_book = self.dataset_book[start : start + length]

    df_trades = self.dataset_trades[df_book.index[0] : df_book.index[-1]]

    return df_book, df_trades

In [None]:
df_book, df_trades = env.resample_book()
df_book.head()

Unnamed: 0_level_0,asks[0].price,asks[1].price,asks[2].price,asks[3].price,asks[4].price,asks[5].price,asks[6].price,asks[7].price,asks[8].price,asks[9].price,...,bids[0].amount,bids[1].amount,bids[2].amount,bids[3].amount,bids[4].amount,bids[5].amount,bids[6].amount,bids[7].amount,bids[8].amount,bids[9].amount
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-05 00:00:00,1539.24,1539.5,1539.63,1539.64,1539.65,1539.71,1539.75,1539.76,1539.78,1539.79,...,7.93114,6.45542,8.88544,0.375,1.0,0.3825,1.545,0.2475,1.9,2.59871
2021-03-05 00:01:00,1541.3,1541.32,1541.33,1541.41,1541.42,1541.43,1541.45,1541.46,1541.5,1541.53,...,9.37198,14.95115,5.00579,4.7,15.0,7.55308,2.74667,0.585,2.9,2.59544
2021-03-05 00:02:00,1533.44,1533.45,1533.46,1533.49,1533.56,1533.64,1533.66,1533.67,1533.7,1533.74,...,0.4,5.94615,0.58749,6.19893,0.31871,2.47229,0.026,1.0,2.61894,0.01362
2021-03-05 00:03:00,1528.99,1529.0,1529.12,1529.13,1529.24,1529.28,1529.35,1529.38,1529.49,1529.51,...,0.76963,0.95991,4.64718,21.6,1.0,2.0,14.11699,2.61644,7.84923,19.268
2021-03-05 00:04:00,1525.18,1525.22,1525.23,1525.3,1525.4,1525.41,1525.46,1525.48,1525.5,1525.51,...,8.03,0.00721,0.00776,0.02185,0.09188,0.02898,0.01067,1.38097,0.00844,0.01363


In [None]:
df_trades.head()

Unnamed: 0_level_0,side,price,amount
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-03-05 00:00:00,sell,1539.22,0.01318
2021-03-05 00:01:00,sell,1541.06,0.04885
2021-03-05 00:02:00,sell,1533.2,0.08495
2021-03-05 00:03:00,buy,1529.0,0.99997
2021-03-05 00:04:00,buy,1525.18,1.07


In [None]:
# | export


@patch
def update_book(self: OrderBookEnv) -> None:
    ts = self.df_book.index[self.idx]
    self.book_prices_bid = self.df_book.loc[
        ts, self.dataset_book.cols_prices_bid
    ].values
    self.book_prices_ask = self.df_book.loc[
        ts, self.dataset_book.cols_prices_ask
    ].values
    self.book_volumes_bid = self.df_book.loc[
        ts, self.dataset_book.cols_volumes_bid
    ].values
    self.book_volumes_ask = self.df_book.loc[
        ts, self.dataset_book.cols_volumes_ask
    ].values

In [None]:
env.df_book = df_book
env.idx = 0
env.update_book()
env.book_prices_bid, env.book_prices_ask, env.book_volumes_bid, env.book_volumes_ask

(array([1539.23, 1539.22, 1538.99, 1538.91, 1538.89, 1538.86, 1538.83,
        1538.82, 1538.79, 1538.77]),
 array([1539.24, 1539.5 , 1539.63, 1539.64, 1539.65, 1539.71, 1539.75,
        1539.76, 1539.78, 1539.79]),
 array([7.93114, 6.45542, 8.88544, 0.375  , 1.     , 0.3825 , 1.545  ,
        0.2475 , 1.9    , 2.59871]),
 array([2.9426 , 0.0236 , 7.37601, 1.45969, 5.     , 3.49522, 4.54795,
        1.96328, 9.00241, 6.4188 ]))

In [None]:
# | export


@patch
def get_midprice(self: OrderBookEnv) -> float:
    return (self.book_prices_ask[0] + self.book_prices_bid[0]) / 2

In [None]:
env.get_midprice()

1539.2350000000001

In [None]:
# | export


@patch
def get_pv(
    self: OrderBookEnv,
) -> float:
    return self.cash + self.inventory * self.get_midprice()

In [None]:
env.cash = 1_000_000
env.inventory = 1_000
env.get_pv()

2539235.0

In [None]:
# | export


@patch
def build_df_orders(
    self: OrderBookEnv, orders: Union[List[Order], None] = None
) -> pd.DataFrame:
    out = {"idx": [], "side": [], "price": [], "amount": [], "ahead": [], "age": []}

    if orders is not None:
        for order in orders:
            out["idx"].append(order.idx)
            out["side"].append(order.side)
            out["price"].append(order.price)
            out["amount"].append(order.amount)
            out["ahead"].append(order.ahead)
            out["age"].append(order.age)

    return pd.DataFrame(out)

In [None]:
env.build_df_orders()

Unnamed: 0,idx,side,price,amount,ahead,age


In [None]:
env.df_orders = env.build_df_orders([o])
env.df_orders

Unnamed: 0,idx,side,price,amount,ahead,age
0,0,ask,2000,100,0.0,0.0


In [None]:
# | export


@patch
def update_order_volumes(self: OrderBookEnv):
    order_volumes_bid = np.zeros(self.dataset_book.price_levels)
    order_volumes_ask = np.zeros(self.dataset_book.price_levels)

    if not self.df_orders.empty:
        order_combos = self.df_orders[["side", "price", "amount"]]
        order_combos_ask = order_combos[order_combos["side"] == "ask"]
        order_combos_bid = order_combos[order_combos["side"] == "bid"]

        if not order_combos_ask.empty:
            order_combos_ask = (
                order_combos_ask[["price", "amount"]].groupby(by="price").sum()
            )
            idx_orders_in_prices = np.where(
                np.isin(self.book_prices_ask, order_combos_ask.index)
            )[0]
            idx_prices_in_orders = np.where(
                np.isin(order_combos_ask.index.values, self.book_prices_ask)
            )[0]

            # print("order_vols: ", order_volumes_ask)
            # print("order_combs: ", order_combos_ask)
            # print(idx_orders_in_prices, idx_prices_in_orders)
            # print(order_volumes_ask[idx_orders_in_prices])
            # print(order_combos_ask["amount"].iloc[
            #         idx_prices_in_orders
            #     ].values)
            if len(idx_orders_in_prices) == len(idx_prices_in_orders) != 0:
                order_volumes_ask[idx_orders_in_prices] = (
                    order_combos_ask["amount"].iloc[idx_prices_in_orders].values
                )

        if not order_combos_bid.empty:
            order_combos_bid = (
                order_combos_bid[["price", "amount"]].groupby(by="price").sum()
            )
            idx_orders_in_prices = np.where(
                np.isin(self.book_prices_bid, order_combos_bid.index)
            )[0]
            idx_prices_in_orders = np.where(
                np.isin(order_combos_bid.index.values, self.book_prices_bid)
            )[0]

            if len(idx_orders_in_prices) == len(idx_prices_in_orders) != 0:
                order_volumes_bid[idx_orders_in_prices] = order_combos_bid[
                    "amount"
                ].iloc[idx_prices_in_orders]

    self.order_volumes_bid = order_volumes_bid
    self.order_volumes_ask = order_volumes_ask

In [None]:
# env.df_orders = env.df_orders.append(
#     pd.DataFrame({
#         "idx": [1, 2],
#         "side": ["ask", "bid"],
#         "price": [env.book_prices_ask[3], env.book_prices_bid[3]],
#         "amount": [100, 200],
#         "ahead": [0, 0],
#         "age": [0, 0]}))
env.df_orders

Unnamed: 0,idx,side,price,amount,ahead,age
0,0,ask,2000,100,0.0,0.0


In [None]:
env.book_prices_bid, env.book_prices_ask

(array([1539.23, 1539.22, 1538.99, 1538.91, 1538.89, 1538.86, 1538.83,
        1538.82, 1538.79, 1538.77]),
 array([1539.24, 1539.5 , 1539.63, 1539.64, 1539.65, 1539.71, 1539.75,
        1539.76, 1539.78, 1539.79]))

In [None]:
env.update_order_volumes()
env.order_volumes_bid, env.order_volumes_ask

(array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))

In [None]:
# | export


@patch
def get_obs(self: OrderBookEnv) -> np.ndarray:
    return np.concatenate(
        [
            np.flip(self.book_volumes_bid),
            self.book_volumes_ask,
            np.flip(self.order_volumes_bid),
            self.order_volumes_ask,
            np.array([self.cash, self.inventory]),
        ]
    )

In [None]:
env.get_obs()

array([2.59871e+00, 1.90000e+00, 2.47500e-01, 1.54500e+00, 3.82500e-01,
       1.00000e+00, 3.75000e-01, 8.88544e+00, 6.45542e+00, 7.93114e+00,
       2.94260e+00, 2.36000e-02, 7.37601e+00, 1.45969e+00, 5.00000e+00,
       3.49522e+00, 4.54795e+00, 1.96328e+00, 9.00241e+00, 6.41880e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       1.00000e+06, 1.00000e+03])

In [None]:
# | export


@patch
def get_info(self: OrderBookEnv):
    return {
        "idx": self.idx,
        "timestamp": str(self.df_book.index[self.idx]),
        "cash": self.cash,
        "inventory": self.inventory,
        "portfolio_value": self.get_pv(),
        #         "vwap": self.vwap,
        #         "total_reward": self.total_reward,
    }

In [None]:
env.idx = 0
env.get_info()

{'idx': 0,
 'timestamp': '2021-03-05 00:00:00',
 'cash': 1000000,
 'inventory': 1000,
 'portfolio_value': 2539235.0}

In [None]:
# | export


@patch
def reset(
    self: OrderBookEnv,
    seed: Union[int, None] = None,
    options: Union[Dict, None] = {"resample": False},
):
    super(OrderBookEnv, self).reset(seed=seed)

    if options["resample"]:
        self.df_book, self.df_trades = self.resample_book(length=self.episode_length)

    self.idx = 0
    if self.episode_length is not None:
        self.idx_end = self.idx + self.episode_length
    else:
        self.idx_end = len(self.df_book)

    self.oid = 0
    self.df_orders = self.build_df_orders()

    self.update_book()
    self.update_order_volumes()

    self.cash = self.initial_cash
    self.inventory = self.initial_inventory
    self.portfolio_value = self.get_pv()
    self.total_reward = 0

    obs = self.get_obs()
    info = self.get_info()

    self.state = obs
    self.info = info

    return obs, info

In [None]:
env.reset()

(array([2.59871e+00, 1.90000e+00, 2.47500e-01, 1.54500e+00, 3.82500e-01,
        1.00000e+00, 3.75000e-01, 8.88544e+00, 6.45542e+00, 7.93114e+00,
        2.94260e+00, 2.36000e-02, 7.37601e+00, 1.45969e+00, 5.00000e+00,
        3.49522e+00, 4.54795e+00, 1.96328e+00, 9.00241e+00, 6.41880e+00,
        0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
        0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
        0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
        0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
        1.00000e+06, 1.00000e+03]),
 {'idx': 0,
  'timestamp': '2021-03-05 00:00:00',
  'cash': 1000000,
  'inventory': 1000,
  'portfolio_value': 2539235.0})

In [None]:
env.reset(options={"resample": True})

(array([1.500000e+01, 3.150020e+00, 6.378770e+00, 1.503374e+01,
        4.091820e+00, 1.070000e+00, 3.210000e-01, 4.601000e+00,
        1.507110e+00, 1.298630e+00, 7.349200e-01, 5.729470e+00,
        2.474070e+00, 2.882000e-01, 5.542400e-01, 2.639190e+00,
        8.807530e+00, 3.251250e+01, 3.376740e+00, 2.746670e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        1.000000e+06, 1.000000e+03]),
 {'idx': 0,
  'timestamp': '2021-03-05 06:16:00',
  'cash': 1000000,
  'inventory': 1000,
  'portfolio_value': 2480655.0})

In [None]:
# | export


@patch
def build_order(self: OrderBookEnv, side: side, price_level: int, amount: float):
    if side == "bid":
        price = self.book_prices_bid[price_level]
        amount = self.cash * amount / price
    elif side == "ask":
        price = self.book_prices_ask[price_level]
        amount = self.inventory * amount
    else:
        raise ValueError("Invalid side value")

    return Order(idx=self.oid, side=side, price=price, amount=amount, ahead=0, age=0)

In [None]:
o = env.build_order(side="ask", price_level=3, amount=0.2)
env.cash, env.inventory, o

(1000000,
 1000,
 Order(idx=0, side='ask', price=1480.97, amount=200.0, ahead=0, age=0))

In [None]:
# | export


@patch
def fill_effects(self: OrderBookEnv, order: Order):
    completed = False
    cash_amount = order.price * order.amount

    if order.side == "bid":
        if self.cash >= cash_amount:
            completed = True
            self.cash -= cash_amount
            self.inventory += order.amount

    elif order.side == "ask":
        if self.inventory >= order.amount:
            completed = True
            self.inventory -= order.amount
            self.cash += cash_amount

    return completed

In [None]:
env.cash, env.inventory, o

(1000000,
 1000,
 Order(idx=0, side='ask', price=1480.97, amount=200.0, ahead=0, age=0))

In [None]:
env.fill_effects(o)

True

In [None]:
env.cash, env.inventory

(1296194.0, 800.0)

In [None]:
# | export


@patch
def build_df_orders(
    self: OrderBookEnv, orders: Union[List[Order], None] = None
) -> pd.DataFrame:
    out = {"idx": [], "side": [], "price": [], "amount": [], "ahead": [], "age": []}

    if orders is not None:
        for order in orders:
            out["idx"].append(order.idx)
            out["side"].append(order.side)
            out["price"].append(order.price)
            out["amount"].append(order.amount)
            out["ahead"].append(order.ahead)
            out["age"].append(order.age)

    return pd.DataFrame(out)

In [None]:
env.df_orders

Unnamed: 0,idx,side,price,amount,ahead,age


In [None]:
env.build_df_orders([o, o, o])

Unnamed: 0,idx,side,price,amount,ahead,age
0,0,ask,1480.97,200.0,0,0
1,0,ask,1480.97,200.0,0,0
2,0,ask,1480.97,200.0,0,0


In [None]:
# | export


@patch
def place_orders(self: OrderBookEnv, orders: List[Order]):
    out_orders = []
    for order in orders:
        completed = self.fill_effects(order)
        if completed:
            out_orders.append(order)

    self.df_orders = pd.concat(
        [self.df_orders, self.build_df_orders(out_orders)]
    ).reset_index(drop=True)

    return out_orders

In [None]:
env.cash, env.inventory

(1296194.0, 800.0)

In [None]:
out_orders = env.place_orders([o, o, o])
env.cash, env.inventory, out_orders

(2184776.0,
 200.0,
 [Order(idx=0, side='ask', price=1480.97, amount=200.0, ahead=0, age=0),
  Order(idx=0, side='ask', price=1480.97, amount=200.0, ahead=0, age=0),
  Order(idx=0, side='ask', price=1480.97, amount=200.0, ahead=0, age=0)])

In [None]:
# | export


@patch
def process_trades(self: OrderBookEnv):
    if not self.df_trades.empty:
        start_time = self.df_book.index[self.idx]
        end_time = self.df_book.index[self.idx + 1]

        trades = self.df_trades.loc[start_time:end_time]
        trades = trades.replace({"side": {"sell": "ask", "buy": "bid"}})
        trades_sum = trades.groupby(by=["side", "price"])["amount"].sum()
        orders_combos = self.df_orders[["side", "price"]].drop_duplicates()

        # print(orders_combos)
        for side, price in zip(orders_combos.side, orders_combos.side):
            # print(trades_sum)
            trades_lookup = trades_sum.get(("ask", 1549.22), pd.Series([]))

            if trades_lookup.empty:
                return

            # trades_amount = trades_lookup["amount"][0]
            trades_amount = trades_lookup
            orders_lookup = self.df_orders.loc[(side, price)]
            orders_lookup.sort_values(by=["ahead"], inplace=True)

            for oid, order in orders_lookup.iterrows():
                if trades_amount <= 0:
                    break

                ahead = order["ahead"]
                amount = order["amount"]

                if trades_amount <= ahead:
                    self.df_orders.at[oid, "oid"] -= trades_amount
                    trades_amount = 0
                elif (trades_amount > ahead) & ((trades_amount - ahead) <= amount):
                    self.df_orders.loc[order_id, "ahead"] = 0
                    trades_amount -= ahead

                    self.df_orders.loc[order_id, "amount"] -= trades_amount
                    self.fill_effects(side, price, trades_amount)
                    trades_amount = 0
                elif (trades_amount > ahead) & ((trades_amount - ahead) > amount):
                    self.df_orders = self.df_orders[
                        self.df_orders.id != order_id
                    ].reset_index(drop=True)
                    self._fill_effects(side, price, amount)
                    trades_amount -= ahead + amount
            else:
                raise ValueError(trades_amount)

In [None]:
env.cash, env.inventory, env.df_orders

(2184776.0,
 200.0,
    idx side    price  amount  ahead  age
 0  0.0  ask  1480.97   200.0    0.0  0.0
 1  0.0  ask  1480.97   200.0    0.0  0.0
 2  0.0  ask  1480.97   200.0    0.0  0.0)

In [None]:
env.process_trades()
env.cash, env.inventory, env.df_orders

(2184776.0,
 200.0,
    idx side    price  amount  ahead  age
 0  0.0  ask  1480.97   200.0    0.0  0.0
 1  0.0  ask  1480.97   200.0    0.0  0.0
 2  0.0  ask  1480.97   200.0    0.0  0.0)

In [None]:
# | export


@patch
def gc_orders(self: OrderBookEnv):
    if not self.df_orders.empty:
        exp_orders = self.df_orders[self.df_orders.age > self.order_max_age]
        del_ids = []
        for oid, order in exp_orders.iterrows():
            price = order["price"]
            amount = order["amount"]
            side = order["side"]

            if side == "bid":
                cash = price * amount
                self.cash += cash
            elif side == "ask":
                self.inventory += amount

            del_ids.append(oid)

        self.df_orders = self.df_orders.drop(del_ids).reset_index(drop=True)

In [None]:
# env.place_orders([o]),
env.cash, env.inventory, env.df_orders

(2184776.0,
 200.0,
    idx side    price  amount  ahead  age
 0  0.0  ask  1480.97   200.0    0.0  0.0
 1  0.0  ask  1480.97   200.0    0.0  0.0
 2  0.0  ask  1480.97   200.0    0.0  0.0)

In [None]:
env.df_orders.at[1, "age"] = 10
env.df_orders

Unnamed: 0,idx,side,price,amount,ahead,age
0,0.0,ask,1480.97,200.0,0.0,0.0
1,0.0,ask,1480.97,200.0,0.0,10.0
2,0.0,ask,1480.97,200.0,0.0,0.0


In [None]:
env.gc_orders()
env.df_orders

Unnamed: 0,idx,side,price,amount,ahead,age
0,0.0,ask,1480.97,200.0,0.0,0.0
1,0.0,ask,1480.97,200.0,0.0,10.0
2,0.0,ask,1480.97,200.0,0.0,0.0


In [None]:
# | export


@patch
def convert_action(self: OrderBookEnv, action: Tuple[str, float, float]):
    """Convert tuple of (side, price, amount) to order object."""
    side, price, amount = action
    order = self.build_order(side, price, amount)

    return order

In [None]:
act = ("ask", 2, 0.2)
env.book_prices_ask, env.cash, env.inventory, env.convert_action(act)

(array([1480.66, 1480.67, 1480.82, 1480.97, 1480.98, 1480.99, 1481.  ,
        1481.01, 1481.08, 1481.11]),
 2184776.0,
 200.0,
 Order(idx=0, side='ask', price=1480.82, amount=40.0, ahead=0, age=0))

In [None]:
@patch
def render(self: OrderBookEnv):
    if self.render_mode:
        return self.render_frame()

In [None]:
# | export


@patch
def render_frame(self: OrderBookEnv):
    raise NotImplementedError

In [None]:
# | export


@patch
def step(self: OrderBookEnv, actions: List[Tuple[str, float, float]]):
    print(actions)
    act_orders = []
    for action in actions:
        if action is not None:
            order = self.convert_action(action)
            act_orders.append(order)

    # TODO: Review this first section
    completed_orders = self.place_orders(act_orders)

    self.gc_orders()
    self.process_trades()

    if not self.df_orders.empty:
        self.df_orders.age += 1

    self.idx += 1
    done = False
    # TODO: Create new is_done function if more conditions for ending
    if self.idx == self.idx_end or self.portfolio_value < self.min_value:
        done = True

    # TODO: What is the reward function
    current_value = self.get_pv()
    reward = current_value - self.portfolio_value
    self.portfolio_value = current_value
    self.total_reward += reward

    self.update_book()
    self.update_order_volumes()

    observations = self.get_obs()
    info = self.get_info()
    self.state = observations
    self.info = info

    if self.render_mode:
        self.render()

    return observations, reward, done, info

In [None]:
env.reset()

(array([1.500000e+01, 3.150020e+00, 6.378770e+00, 1.503374e+01,
        4.091820e+00, 1.070000e+00, 3.210000e-01, 4.601000e+00,
        1.507110e+00, 1.298630e+00, 7.349200e-01, 5.729470e+00,
        2.474070e+00, 2.882000e-01, 5.542400e-01, 2.639190e+00,
        8.807530e+00, 3.251250e+01, 3.376740e+00, 2.746670e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        1.000000e+06, 1.000000e+03]),
 {'idx': 0,
  'timestamp': '2021-03-05 06:16:00',
  'cash': 1000000,
  'inventory': 1000,
  'portfolio_value': 2480655.0})

In [None]:
a1 = ("ask", 2, 0.1)
a2 = ("bid", 3, 0.2)
a3 = ("ask", 1, 0.3)
env.step([a1, a2, a3])
env.info

[('ask', 2, 0.1), ('bid', 3, 0.2), ('ask', 1, 0.3)]


{'idx': 1,
 'timestamp': '2021-03-05 06:17:00',
 'cash': 1392283.0,
 'inventory': 735.0867594712704,
 'portfolio_value': 2482199.81370185}

In [None]:
# | export


@patch
def render_frame(self: OrderBookEnv):
    raise NotImplementedError

In [None]:
# | export


@patch
def render(self: OrderBookEnv):
    if self.render_mode == "human":
        self.render_frame()

In [None]:
# | export


@patch
def close(self: OrderBookEnv):
    raise NotImplementedError

In [None]:
# | hide
import nbdev

nbdev.nbdev_export()