In [1]:
import hyperopt
import numpy as np
import torch
from tradezoo.agent import Actor, Agent, Critic
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import balance_plot, decision_plot, trades_plot, training_plot
from tradezoo.trainer import Trainer

In [2]:
def minimize_me(hyperparameters):
    total_balances = []
    try:
        for attempt in range(4):
            agent, _ = train(*hyperparameters)
            test_results = test_agent(agent)
            total_balances.append((
        test_results[-1].observation.cash_balance
        + test_results[-1].observation.asset_balance
    ))
    except:
        return 0  # we have no use for it if it crashes

    return -np.mean(total_balances)


def train(actor_lr, critic_lr):
    actor = Actor()
    critic = Critic()
    agent = Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=actor_lr),
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=critic_lr),
        discount_factor=0.99,
        uncertainty=1e-3,
    )

    train_game = make_game(agent)
    trainer = Trainer.new(
        game=train_game,
        replay_buffer_capacity=32,
        batch_size=16,
    )
    train_results = [trainer.train_online_() for _ in range(1024)]
    return agent, train_results


def test_agent(agent: Agent):
    test_game = make_game(agent)
    return [test_game.turn_() for test_step in range(1024)]


def make_game(agent: Agent):
    trader_account = Account(cash_balance=64, asset_balance=64)
    client_account = Account(cash_balance=float("inf"), asset_balance=float("inf"))
    price_process = SineWave(period=16).exp()
    trader = Trader(
        agent=agent,
        account=trader_account,
        client=Client(
            account=client_account,
            for_account=trader_account,
            ask_process=price_process * 1.1,
            bid_process=price_process / 1.1,
        ),
    )
    return Game.new(
        market=Market.from_accounts([trader_account, client_account]),
        traders=[trader],
    )


minimize_me((1e-4, 1e-3))


-330.0583771210039

In [3]:
hyperspace = [
    hyperopt.hp.lognormal("actor_lr", mu=-7, sigma=3),
    hyperopt.hp.lognormal("critic_lr", mu=-5, sigma=3),
]
hyperopt.pyll.stochastic.sample(hyperspace)


(0.0003486937630901391, 0.08731967679162186)

In [4]:
best_hyperparameters = hyperopt.fmin(
    fn=minimize_me, space=hyperspace, algo=hyperopt.tpe.suggest, max_evals=64
)
best_hyperparameters


100%|██████████| 64/64 [23:12<00:00, 21.76s/trial, best loss: -375.1930926196611] 


{'actor_lr': 1.8960251623376217e-07, 'critic_lr': 0.024836865732543387}

In [5]:
best_agent, training_history = train(**best_hyperparameters)
training_plot(training_history)

In [6]:
decision_plot(best_agent)

In [7]:
example_trades = test_agent(best_agent)
trades_plot(example_trades)

In [8]:
balance_plot(example_trades)