In [1]:
import hyperopt
import torch
from tradezoo.agent import Actor, Agent, Critic
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.trainer import Trainer

In [2]:
def minimize_me(hyperparameters):
    actor_lr, critic_lr, uncertainty, max_critic_grad_norm, max_actor_grad_norm = hyperparameters
    actor = Actor()
    critic = Critic()
    agent = Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=actor_lr),
        max_actor_grad_norm=max_actor_grad_norm,
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=critic_lr),
        max_critic_grad_norm=max_critic_grad_norm,
        discount_factor=0.99,
        uncertainty=uncertainty,
    )

    train_game = make_game(agent)
    trainer = Trainer.new(
        game=train_game,
        replay_buffer_capacity=32,
        batch_size=16,
    )
    for train_step in range(1024):
        try:
            trainer.turn_()
        except:
            return 0  # we have no use for it if it crashes

    test_results = test_agent(agent)
    return -(
        test_results[-1].observation.cash_balance
        + test_results[-1].observation.asset_balance
    )


def test_agent(agent: Agent):
    test_game = make_game(agent)
    return [test_game.turn_() for test_step in range(1024)]


def make_game(agent: Agent):
    trader_account = Account(cash_balance=512, asset_balance=512)
    client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
    price_process = 1 + SineWave(period=64) * 0.2
    trader = Trader(
        agent=agent,
        account=trader_account,
        client=Client(
            account=client_account,
            for_account=trader_account,
            ask_process=price_process * 1.1,
            bid_process=price_process * 0.9,
        ),
    )
    return Game.new(
        market=Market.from_accounts([trader_account, client_account]),
        traders=[trader],
    )


minimize_me((1e-4, 1e-3, 1e-2, 1024, 1024))


-1002.6503519985122

In [3]:
hyperspace = [
    hyperopt.hp.lognormal("actor_lr", mu=-9, sigma=5),
    hyperopt.hp.lognormal("critic_lr", mu=-4, sigma=5),
    hyperopt.hp.lognormal("uncertainty", mu=-2, sigma=2),
    hyperopt.hp.lognormal("max_actor_grad_norm", mu=10, sigma=5),
    hyperopt.hp.lognormal("max_critic_grad_norm", mu=10, sigma=5),
]
hyperopt.pyll.stochastic.sample(hyperspace)


(0.036956254959784496,
 0.3586177088361633,
 0.011819335191728028,
 10192.883861179851,
 6204.7783542631)

In [4]:
best_hyperparameters = hyperopt.fmin(
    fn=minimize_me, space=hyperspace, algo=hyperopt.tpe.suggest, max_evals=256
)
best_hyperparameters


100%|██████████| 256/256 [25:41<00:00,  6.02s/trial, best loss: -1024.3835671251977]


{'actor_lr': 0.0007012256205846683,
 'critic_lr': 1.0915912618673516,
 'max_actor_grad_norm': 358438978.7098799,
 'max_critic_grad_norm': 360.9946692827808,
 'uncertainty': 0.08562574545151436}