In [1]:
import hyperopt
import torch
from tradezoo.agent import Actor, Agent, Critic
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import trades_plot, balance_plot, decision_plot
from tradezoo.trainer import Trainer

In [2]:
def minimize_me(hyperparameters):
    try:
        agent = train(*hyperparameters)
    except:
        return 0  # we have no use for it if it crashes

    test_results = test_agent(agent)
    return -(
        test_results[-1].observation.cash_balance
        + test_results[-1].observation.asset_balance
    )


def train(actor_lr, critic_lr):
    actor = Actor()
    critic = Critic()
    agent = Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=actor_lr),
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=critic_lr),
        discount_factor=0.99,
        uncertainty=1e-3,
    )

    train_game = make_game(agent)
    trainer = Trainer.new(
        game=train_game,
        replay_buffer_capacity=32,
        batch_size=16,
    )
    for train_step in range(1024):
        trainer.turn_()
    return agent


def test_agent(agent: Agent):
    test_game = make_game(agent)
    return [test_game.turn_() for test_step in range(1024)]


def make_game(agent: Agent):
    trader_account = Account(cash_balance=512, asset_balance=512)
    client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
    price_process = 1 + SineWave(period=64) * 0.2
    trader = Trader(
        agent=agent,
        account=trader_account,
        client=Client(
            account=client_account,
            for_account=trader_account,
            ask_process=price_process * 1.1,
            bid_process=price_process * 0.9,
        ),
    )
    return Game.new(
        market=Market.from_accounts([trader_account, client_account]),
        traders=[trader],
    )


minimize_me((1e-4, 1e-3))


-1022.7944970472219

In [3]:
hyperspace = [
    hyperopt.hp.lognormal("actor_lr", mu=-7, sigma=3),
    hyperopt.hp.lognormal("critic_lr", mu=-5, sigma=3),
]
hyperopt.pyll.stochastic.sample(hyperspace)


(5.9183574377211e-06, 0.009118256187888519)

In [4]:
best_hyperparameters = hyperopt.fmin(
    fn=minimize_me, space=hyperspace, algo=hyperopt.tpe.suggest, max_evals=256
)
best_hyperparameters


100%|██████████| 256/256 [23:38<00:00,  5.54s/trial, best loss: -1026.1306168863161]


{'actor_lr': 0.0001525527841565081, 'critic_lr': 0.026012333017656812}

In [5]:
best_agent = train(**best_hyperparameters)
decision_plot(best_agent)

In [6]:
example_trades = test_agent(best_agent)
trades_plot(example_trades)

In [7]:
balance_plot(example_trades)