In [1]:
import math
import torch
from tradezoo.agent import Critic
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.training import Experiment, LearningAgent, ReplayBuffer

In [2]:
def make_agent():
    critic = Critic()
    return LearningAgent(
        critic=critic,
        decision_resolution=8,
        max_desperation=0.0,
        horizon=2,
        exploration_schedule=lambda step: 1024 / (step + 1024),
        utility_function=math.log,
        discount_factor=0.99,
        replay_buffer=ReplayBuffer.empty(capacity=64),
        batch_size=16,
        train_steps_per_turn=64,
        optimizer=torch.optim.Adam(critic.parameters(), lr=1e-4),
        target=Critic(),
        steps_per_target_update=4096,
        steps_completed=0,
    )


trader_account = Account(cash_balance=2048, asset_balance=2048)
client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
price_process = 1 + SineWave(period=16) * 0.2
trader = Trader(
    agent=make_agent(),
    account=trader_account,
    client=Client(
        account=client_account,
        for_account=trader_account,
        ask_process=price_process * 1.1,
        bid_process=price_process * 0.9,
    ),
)
game = Game.new(
    market=Market.from_accounts([trader_account, client_account]),
    traders=[trader],
)
experiment = Experiment.run_(game=game, num_steps=1024)

  0%|          | 0/1024 [00:00<?, ?it/s]

In [3]:
experiment.training_plot(trader.agent)

In [4]:
experiment.trades_plot(trader)

In [5]:
experiment.balance_plot(trader)

In [6]:
experiment.reward_plot(trader)