In [1]:
import numpy as np
import torch
from tqdm.notebook import trange
from tradezoo.agent import Action, Actor, Agent, Critic, Observation
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import balance_plot, decision_plot, reward_plot, trades_plot, uncertainty_plot, utility_plot
from tradezoo.trainer import Experience, Trainer

In [2]:
actor = Actor()
critic = Critic()
agent = Agent(
    actor=actor,
    actor_optimizer=torch.optim.Adam(actor.parameters(), lr=1e-4),
    critic=critic,
    critic_optimizer=torch.optim.Adam(critic.parameters(), lr=1e-2),
    discount_factor=0.99,
    uncertainty=1e-3,
)

In [3]:
def mock_experience() -> Experience:
    mid_price = np.random.uniform(0.5, 1.5)
    spread = np.random.uniform(0, 1)
    old_observation = Observation(
        cash_balance=np.random.uniform(2, 2048),
        asset_balance=np.random.uniform(2, 2048),
        best_ask=mid_price * (1 + spread),
        best_bid=mid_price / (1 + spread),
    )
    old_utility = old_observation.cash_balance + old_observation.asset_balance * mid_price
    action = Action(
        log_mid_price=np.log(np.random.uniform(0.5, 1.5)),
        log_spread=np.log(np.random.uniform(0, 1)),
    )

    new_cash_balance = old_observation.cash_balance
    new_asset_balance = old_observation.asset_balance
    if action.ask <= old_observation.best_bid:
        new_cash_balance += action.ask
        new_asset_balance -= 1
    if action.bid >= old_observation.best_ask:
        new_cash_balance -= action.bid
        new_asset_balance += 1
    new_observation = Observation(
        cash_balance=new_cash_balance,
        asset_balance=new_asset_balance,
        best_ask=old_observation.best_ask,
        best_bid=old_observation.best_bid,
    )
    new_utility = new_observation.cash_balance + new_observation.asset_balance * mid_price

    return Experience(
        old_observation=old_observation,
        action=action,
        reward=new_utility - old_utility,
        new_observation=new_observation,
    )


mock_experience()


Experience(old_observation=Observation(cash_balance=1634.4065251750826, asset_balance=1428.7611766734067, best_ask=1.8347039403114582, best_bid=1.0992246236418652), action=Action(log_mid_price=-0.2845353680841478, log_spread=-1.1265750829232966), reward=-0.4238877798293288, new_observation=Observation(cash_balance=1635.4027612448815, asset_balance=1427.7611766734067, best_ask=1.8347039403114582, best_bid=1.0992246236418652))

In [4]:
for _ in trange(4096):
    Trainer.train_(agent, experiences=[mock_experience() for _ in range(16)])

  0%|          | 0/4096 [00:00<?, ?it/s]

In [5]:
trader_account = Account(cash_balance=1024, asset_balance=1024)
client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
price_process = 1 + SineWave(period=256) * 0.2
trader = Trader(
    agent=agent,
    account=trader_account,
    client=Client(
        account=client_account,
        for_account=trader_account,
        ask_process=price_process * 1.1,
        bid_process=price_process * 0.9,
    ),
)
game = Game.new(
    market=Market.from_accounts([trader_account, client_account]),
    traders=[trader],
)
turn_results = [game.turn_() for _ in trange(4096)]

  0%|          | 0/4096 [00:00<?, ?it/s]

In [6]:
trades_plot(turn_results)

In [7]:
reward_plot(turn_results)

In [8]:
utility_plot(agent)

In [9]:
decision_plot(agent)