In [1]:
import hyperopt
import numpy as np
import torch
from tqdm.notebook import trange
from tradezoo.agent import Action, Actor, Agent, Critic, Observation
from tradezoo.game import Game, Client, SineWave, Trader
from tradezoo.market import Account, Market
from tradezoo.plots import balance_plot, decision_plot, trades_plot, uncertainty_plot, utility_plot
from tradezoo.trainer import Experience, Trainer

In [2]:
def mock_experience() -> Experience:
    old_observation = mock_observation()
    action = Action(
        mid_price=np.random.uniform(0.5, 1.5), spread=np.random.uniform(0, 1)
    )
    return Experience(
        old_observation=old_observation,
        action=action,
        reward=mock_reward(old_observation, action),
        new_observation=old_observation,
    )


def mock_observation() -> Observation:
    return Observation(
        cash_balance=np.random.uniform(1, 4096),
        asset_balance=np.random.uniform(1, 4096),
        best_ask=np.random.uniform(0.5, 1.5),
        best_bid=np.random.uniform(0.5, 1.5),
    )


def mock_reward(observation: Observation, action: Action):
    return -(
        (action.ask - 1.5) ** 2
        + (action.bid - 0.5) ** 2
    )


mock_experience()


Experience(old_observation=Observation(cash_balance=329.0765463642522, asset_balance=2803.503761912187, best_ask=0.691846145317189, best_bid=1.0547728696569534), action=Action(mid_price=1.3207983987748582, spread=0.08179399496956685), reward=-0.5248098544982219, new_observation=Observation(cash_balance=329.0765463642522, asset_balance=2803.503761912187, best_ask=0.691846145317189, best_bid=1.0547728696569534))

In [3]:
def minimize_me(hyperparameters):
    try:
        agent = train(*hyperparameters)
    except:
        return float("inf")
    return -np.mean(
        [
            mock_reward(observation, agent.decide(observation.batch).sample()[0])
            for observation in test_observations
        ]
    )

def train(actor_lr, max_actor_grad_norm, critic_lr, max_critic_grad_norm, uncertainty):
    actor = Actor()
    critic = Critic()
    agent = Agent(
        actor=actor,
        actor_optimizer=torch.optim.Adam(actor.parameters(), lr=actor_lr),
        max_actor_grad_norm=max_actor_grad_norm,
        critic=critic,
        critic_optimizer=torch.optim.Adam(critic.parameters(), lr=critic_lr),
        max_critic_grad_norm=max_critic_grad_norm,
        discount_factor=0.99,
        uncertainty=uncertainty,
    )

    for _ in range(1024):
        Trainer.train_(agent, experiences=[mock_experience() for _ in range(16)])
    return agent

test_observations = [mock_observation() for _ in range(256)]
minimize_me((1e-3, 1e-2))


inf

In [4]:
hyperspace = [
    hyperopt.hp.lognormal("actor_lr", mu=-8, sigma=3),
    hyperopt.hp.lognormal("max_actor_grad_norm", mu=5, sigma=3),
    hyperopt.hp.lognormal("critic_lr", mu=-8, sigma=3),
    hyperopt.hp.lognormal("max_critic_grad_norm", mu=5, sigma=3),
    hyperopt.hp.lognormal("uncertainty", mu=-4, sigma=2),
]
hyperopt.pyll.stochastic.sample(hyperspace)


(0.00019481160118529212,
 24992.62055075102,
 0.00010507817998303261,
 1905.077021330425,
 0.1448395133560849)

In [5]:
best_hyperparameters = hyperopt.fmin(
    fn=minimize_me, space=hyperspace, algo=hyperopt.tpe.suggest, max_evals=64
)
best_hyperparameters


100%|██████████| 64/64 [03:59<00:00,  3.74s/trial, best loss: 1.8793413708118585]


{'actor_lr': 0.0009490500438035371,
 'critic_lr': 8.153746823532911e-06,
 'max_actor_grad_norm': 700.1554595859798,
 'max_critic_grad_norm': 3.7389421482500427,
 'uncertainty': 0.1619064668856621}

In [6]:
best_agent = train(**best_hyperparameters)
decision_plot(best_agent)

In [7]:
utility_plot(best_agent)