In [1]:
import hyperopt
import torch
from tradezoo.agent import Actor, Agent, Critic
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.trainer import Trainer

In [2]:
def minimize_me(hyperparameters):
    actor_lr, critic_lr, replay_buffer_capacity = hyperparameters
    actor = Actor()
    critic = Critic()
    agent = Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=actor_lr),
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=critic_lr),
        discount_factor=0.99,
    )

    train_game = make_game(agent)
    trainer = Trainer.new(
        game=train_game,
        replay_buffer_capacity=int(replay_buffer_capacity),
        batch_size=16,
    )
    for train_step in range(1024):
        try:
            trainer.turn_()
        except:
            return float("inf")  # we have no use for it if it crashes

    test_results = test_agent(agent)
    return -(
        test_results[-1].observation.cash_balance
        + test_results[-1].observation.asset_balance
    )


def test_agent(agent: Agent):
    test_game = make_game(agent)
    return [test_game.turn_() for test_step in range(1024)]


def make_game(agent: Agent):
    trader_account = Account(cash_balance=512, asset_balance=512)
    client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
    price_process = 1 + SineWave(period=64) * 0.2
    trader = Trader(
        agent=agent,
        account=trader_account,
        client=Client(
            account=client_account,
            for_account=trader_account,
            ask_process=price_process * 1.1,
            bid_process=price_process * 0.9,
        ),
    )
    return Game.new(
        market=Market.from_accounts([trader_account, client_account]),
        traders=[trader],
    )


minimize_me((1e-4, 1e-3, 16))


-974.2647989444093

In [3]:
hyperspace = [
    hyperopt.hp.lognormal("actor_lr", mu=-10, sigma=3),
    hyperopt.hp.lognormal("critic_lr", mu=-9, sigma=3),
    hyperopt.hp.qloguniform("replay_buffer_capacity", low=3, high=5, q=8),
]
hyperopt.pyll.stochastic.sample(hyperspace)


(2.1976344410027015e-05, 0.0011976732118096046, 88.0)

In [4]:
best_hyperparameters = hyperopt.fmin(
    fn=minimize_me, space=hyperspace, algo=hyperopt.rand.suggest, max_evals=128
)
best_hyperparameters


100%|██████████| 128/128 [16:01<00:00,  7.51s/trial, best loss: -1026.6935992703238]


{'actor_lr': 0.00015984502023655585,
 'critic_lr': 0.01738586299947761,
 'replay_buffer_capacity': 40.0}