In [1]:
import torch
from tqdm.notebook import trange
from tradezoo.agent import Actor, Agent, Critic
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import balance_plot, decision_plot, trades_plot, uncertainty_plot, utility_plot
from tradezoo.trainer import Trainer

In [2]:
def make_agent():
    actor = Actor()
    critic = Critic()
    return Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=2e-5),
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=1e-3),
        discount_factor=0.99,
        uncertainty=0.0,
        max_grad_norm=float("inf"),
    )


num_traders = 1
trader_accounts = [
    Account(cash_balance=4096, asset_balance=4096) for _ in range(num_traders)
]
client_accounts = [
    Account(cash_balance=float("inf"), asset_balance=float("inf"))
    for _ in range(num_traders)
]
price_process = 1 + SineWave(period=256) * 0.2
traders = [
    Trader(
        agent=make_agent(),
        account=trader_account,
        client=Client(
            account=client_account,
            for_account=trader_account,
            ask_process=price_process * 1.1,
            bid_process=price_process * 0.9,
        ),
    )
    for trader_account, client_account in zip(trader_accounts, client_accounts)
]
game = Game.new(
    market=Market.from_accounts(trader_accounts + client_accounts),
    traders=traders,
)
trainer = Trainer.new(game=game, replay_buffer_capacity=16, batch_size=16)
turn_results = [trainer.turn_() for _ in trange(4096)]

  0%|          | 0/4096 [00:00<?, ?it/s]

Critic grad norm: 31906121.902870998
Actor grad norm: 18770.247672944744
Critic grad norm: 31862274.103929702
Actor grad norm: 45032.39310378248
Critic grad norm: 32375559.902221855
Actor grad norm: 65258.8660106887
Critic grad norm: 32458412.02971591
Actor grad norm: 57523.942329526086
Critic grad norm: 33186048.910772238
Actor grad norm: 76527.81202729042
Critic grad norm: 33506385.74856564
Actor grad norm: 22682.32638028273
Critic grad norm: 34128162.40127966
Actor grad norm: 98487.05229836002
Critic grad norm: 35201082.645742044
Actor grad norm: 21777.910470605428
Critic grad norm: 32790118.53357935
Actor grad norm: 26532.832326944055
Critic grad norm: 34131074.59097207
Actor grad norm: 41259.728936340005
Critic grad norm: 35347799.36603972
Actor grad norm: 62173.60348847357
Critic grad norm: 37230267.77990041
Actor grad norm: 72015.58785267246
Critic grad norm: 38659650.87505204
Actor grad norm: 68710.82249873823
Critic grad norm: 40034715.09862756
Actor grad norm: 69009.788290622

In [3]:
trades_plot(turn_results)

In [4]:
uncertainty_plot(turn_results)

In [5]:
balance_plot(turn_results)

In [6]:
utility_plot(traders[0].agent)

In [7]:
decision_plot(traders[0].agent)