In [1]:
import numpy as np
import torch
from tqdm.notebook import trange
from tradezoo.agent import Action, Actor, Agent, Critic, Observation
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import balance_plot, decision_plot, trades_plot, training_plot, utility_plot
from tradezoo.trainer import Experience, Trainer

In [2]:
actor = Actor()
critic = Critic()
agent = Agent(
    actor=actor,
    actor_optimizer=torch.optim.Adam(actor.parameters(), lr=1e-4),
    critic=critic,
    critic_optimizer=torch.optim.Adam(critic.parameters(), lr=1e-3),
    discount_factor=0.99,
    uncertainty=1e-3,
)

In [3]:
trader_account = Account(cash_balance=64, asset_balance=64)
client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
price_process = (SineWave(period=16) * 1).exp()
trader = Trader(
    agent=agent,
    account=trader_account,
    client=Client(
        account=client_account,
        for_account=trader_account,
        ask_process=price_process * 1.1,
        bid_process=price_process / 1.1,
    ),
)

In [4]:
def mock_experience() -> Experience:
    step = np.random.randint(0, 1024)
    old_observation = Observation(
        cash_balance=np.random.uniform(0, trader_account.cash_balance * 2),
        asset_balance=np.random.uniform(0, trader_account.asset_balance * 2),
        best_ask=trader.client.ask_process.value(step),
        best_bid=trader.client.bid_process.value(step),
    )
    (action,) = agent.decide(old_observation.batch).sample()
    new_cash_balance = old_observation.cash_balance
    new_asset_balance = old_observation.asset_balance
    if action.ask <= old_observation.best_bid:
        sold_assets = min(new_asset_balance, 1)
        new_cash_balance += action.ask * sold_assets
        new_asset_balance -= sold_assets
    if action.bid >= old_observation.best_ask:
        bought_assets = min(1, new_cash_balance / action.bid)
        new_cash_balance -= action.bid * bought_assets
        new_asset_balance += bought_assets
    new_observation = Observation(
        cash_balance=new_cash_balance,
        asset_balance=new_asset_balance,
        best_ask=trader.client.ask_process.value(step + 1),
        best_bid=trader.client.bid_process.value(step + 1),
    )
    return Experience(
        old_observation=old_observation,
        action=action,
        reward=trader.utility(new_observation),
        new_observation=new_observation,
    )


mock_experience()


Experience(old_observation=Observation(cash_balance=7.014281490936355, asset_balance=114.73152860033167, best_ask=0.43667356550782893, best_bid=0.3608872442213462), action=Action(log_mid_price=-0.09116575121879578, log_spread=0.15776464343070984), reward=4.167979184358379, new_observation=Observation(cash_balance=7.014281490936355, asset_balance=114.73152860033167, best_ask=0.5423755605347599, best_bid=0.4482442649047602))

Using the difference between the new and old utility as the reward in the cell above doesn't help.

In [5]:
train_results = [
    Trainer.train_(agent, experiences=[mock_experience() for _ in range(32)])
    for _ in trange(4096)
]

  0%|          | 0/4096 [00:00<?, ?it/s]

In [6]:
training_plot(train_results)

In [7]:
game = Game.new(
    market=Market.from_accounts([trader_account, client_account]),
    traders=[trader],
)
turn_results = [game.turn_() for _ in trange(1024)]

  0%|          | 0/1024 [00:00<?, ?it/s]

In [8]:
trades_plot(turn_results)

In [9]:
balance_plot(turn_results)

In [10]:
utility_plot(trader)

In [11]:
decision_plot(agent)