In [1]:
import hyperopt
import numpy as np
import torch
from tqdm.notebook import trange
from tradezoo.agent import Action, Actor, Agent, Critic, Observation
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import balance_plot, decision_plot, trades_plot, uncertainty_plot, utility_plot
from tradezoo.trainer import Experience, Trainer

In [2]:
def mock_experience(agent: Agent) -> Experience:
    old_observation = mock_observation()
    action = agent.decide(old_observation.batch).sample()[0]
    return Experience(
        old_observation=old_observation,
        action=action,
        reward=mock_reward(old_observation, action),
        new_observation=old_observation,
    )


def mock_observation() -> Observation:
    return Observation(
        cash_balance=np.random.uniform(1, 4096),
        asset_balance=np.random.uniform(1, 4096),
        best_ask=np.random.uniform(0.5, 1.5),
        best_bid=np.random.uniform(0.5, 1.5),
    )


def mock_reward(observation: Observation, action: Action):
    return -(
        (action.ask - 1.5) ** 2
        + (action.bid - 0.5) ** 2
    )


In [3]:
def train(actor_lr, critic_lr, num_steps=1024):
    actor = Actor()
    critic = Critic()
    agent = Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=actor_lr),
        max_actor_grad_norm=float("inf"),
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=critic_lr),
        max_critic_grad_norm=float("inf"),
        discount_factor=0.99,
        uncertainty=1e-3,
    )

    for _ in range(num_steps):
        Trainer.train_(agent, experiences=[mock_experience(agent) for _ in range(16)])
    return agent

train(actor_lr=5e-2, critic_lr=1e-2)

Agent(actor=Actor(), actor_optimizer=Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.05
    weight_decay: 0
), max_actor_grad_norm=inf, critic=Critic(
  (network): Sequential(
    (0): Linear(in_features=4, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=16, out_features=1, bias=True)
  )
), critic_optimizer=Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0
), max_critic_grad_norm=inf, discount_factor=0.99, uncertainty=0.001)

In [4]:
def minimize_me(hyperparameters):
    try:
        agent = train(*hyperparameters)
    except ValueError:
        return 999 # infinity might be problematic...?
    return -np.mean(
        [
            mock_reward(observation, agent.decide(observation.batch).sample()[0])
            for observation in test_observations
        ]
    )

test_observations = [mock_observation() for _ in range(256)]
minimize_me((1e-3, 1e-2))


0.25425815209766317

In [5]:
hyperspace = [
    hyperopt.hp.lognormal("actor_lr", mu=-8, sigma=2),
    hyperopt.hp.lognormal("critic_lr", mu=-6, sigma=2),
]
hyperopt.pyll.stochastic.sample(hyperspace)


(5.8607807036548907e-05, 0.0012959522612875263)

In [6]:
best_hyperparameters = hyperopt.fmin(
    fn=minimize_me, space=hyperspace, algo=hyperopt.tpe.suggest, max_evals=64
)
best_hyperparameters


100%|██████████| 64/64 [10:17<00:00,  9.65s/trial, best loss: 0.0005078567659869645]


{'actor_lr': 0.015669481936786304, 'critic_lr': 0.04719171413137972}

In [7]:
best_agent = train(**best_hyperparameters)
-np.mean(
    [
        mock_reward(observation, best_agent.decide(observation.batch).sample()[0])
        for observation in test_observations
    ]
)

0.0006260456695396067

In [8]:
decision_plot(best_agent)

In [9]:
utility_plot(best_agent)