In [None]:
import math
import numpy as np
import torch
from tradezoo.agent import Critic
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.training import Experiment, LearningAgent, ReplayBuffer

In [None]:
def make_agent():
    critic = Critic()
    return LearningAgent(
        critic=critic,
        horizon=2,
        allocation_space=np.linspace(0, 1, num=2),
        relative_price_space=1.2 ** np.linspace(-1, 1, num=64),
        exploration_schedule=lambda step: 4096 / (step + 4096),
        utility_function=math.log,
        discount_factor=0.99,
        replay_buffer=ReplayBuffer.empty(capacity=64),
        batch_size=16,
        train_steps_per_turn=64,
        optimizer=torch.optim.Adam(critic.parameters(), lr=2e-4),
        target=Critic(),
        steps_per_target_update=2048,
        steps_completed=0,
    )


trader_account = Account(cash_balance=2048, asset_balance=2048)
client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
price_process = 1 + SineWave(period=16) * 0.2
trader = Trader(
    agent=make_agent(),
    account=trader_account,
    client=Client(
        account=client_account,
        for_account=trader_account,
        ask_process=price_process * 1.1,
        bid_process=price_process / 1.1,
    ),
)
game = Game.new(
    market=Market.from_accounts([trader_account, client_account]),
    traders=[trader],
)
experiment = Experiment.run_(game=game, num_steps=1024)

In [None]:
experiment.training_plot(trader.agent)

In [None]:
experiment.trades_plot(trader)

In [None]:
experiment.balance_plot(trader)

In [None]:
experiment.reward_plot(trader)