In [1]:
import hyperopt
import numpy as np
import torch
from tqdm.notebook import trange
from tradezoo.agent import Action, Actor, Agent, Critic, Observation
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import balance_plot, decision_plot, trades_plot, uncertainty_plot, utility_plot
from tradezoo.trainer import Experience, Trainer

In [2]:
def mock_experience(agent: Agent) -> Experience:
    old_observation = mock_observation()
    action = agent.decide(old_observation.batch).sample()[0]
    return Experience(
        old_observation=old_observation,
        action=action,
        reward=mock_reward(old_observation, action),
        new_observation=old_observation,
    )


def mock_observation() -> Observation:
    return Observation(
        cash_balance=np.random.uniform(1, 4096),
        asset_balance=np.random.uniform(1, 4096),
        best_ask=np.random.uniform(0.5, 1.5),
        best_bid=np.random.uniform(0.5, 1.5),
    )


def mock_reward(observation: Observation, action: Action):
    return -(
        (action.ask - 1.5) ** 2
        + (action.bid - 0.5) ** 2
    )


In [3]:
def minimize_me(hyperparameters):
    try:
        agent = train(*hyperparameters)
    except ValueError:
        return 999  # infinity might be problematic...?
    return -np.mean(
        [
            mock_reward(observation, agent.decide(observation.batch).sample()[0])
            for observation in test_observations
        ]
    )


def train(actor_lr, critic_lr, num_steps=1024):
    actor = Actor()
    critic = Critic()
    agent = Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=actor_lr),
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=critic_lr),
        discount_factor=0.99,
        uncertainty=1e-3,
    )

    for _ in range(num_steps):
        Trainer.train_(agent, experiences=[mock_experience(agent) for _ in range(16)])
    return agent


test_observations = [mock_observation() for _ in range(256)]
minimize_me((1e-3, 1e-2))


0.03711109405987695

In [4]:
hyperspace = [
    hyperopt.hp.lognormal("actor_lr", mu=-7, sigma=2),
    hyperopt.hp.lognormal("critic_lr", mu=-5, sigma=2),
]
hyperopt.pyll.stochastic.sample(hyperspace)


(0.00021707788830128416, 0.19175997056264074)

In [5]:
best_hyperparameters = hyperopt.fmin(
    fn=minimize_me, space=hyperspace, algo=hyperopt.tpe.suggest, max_evals=8
)
best_hyperparameters


100%|██████████| 8/8 [02:28<00:00, 18.56s/trial, best loss: 0.004633321316961223]


{'actor_lr': 0.0011696440597733964, 'critic_lr': 0.0810843158878982}

In [6]:
best_agent = train(**best_hyperparameters)
-np.mean(
    [
        mock_reward(observation, best_agent.decide(observation.batch).sample()[0])
        for observation in test_observations
    ]
)

0.08897894469843168

In [7]:
decision_plot(best_agent)

In [8]:
utility_plot(best_agent)