In [1]:
import numpy as np
import plotly.graph_objects as go
import torch
from tqdm.notebook import tqdm, trange
from tradezoo.agent import Action, DecisionBatch

In [2]:
class Normal(torch.nn.Module):
    def __init__(self, uncertainty: float):
        super().__init__()
        self.uncertainty = uncertainty
        self.loc = torch.nn.Parameter(torch.tensor([0], dtype=torch.float))
        self.scale = torch.nn.Parameter(torch.tensor([1], dtype=torch.float))
    
    def torch_distribution(self):
        return torch.distributions.Normal(self.loc, self.uncertainty + self.scale.abs())

Normal(uncertainty=0.001).torch_distribution().sample()

tensor([1.4617])

In [3]:
class MockAgent(torch.nn.Module):
    def __init__(self, uncertainty: float):
        super().__init__()
        self.log_mid_price = Normal(uncertainty=uncertainty)
        self.log_spread = Normal(uncertainty=uncertainty)

    def decide(self):
        return DecisionBatch(
            log_mid_price=self.log_mid_price.torch_distribution(),
            log_spread=self.log_spread.torch_distribution(),
        )
    
    def loss(self, action: Action):
        return -self.reward(action) * self.decide().log_probabilities([action])
    
    def mean_reward(self, num_samples=256):
        return np.mean([self.reward(self.decide().sample()[0]) for _ in range(num_samples)])
    
    @classmethod
    def reward(cls, action: Action):
        return -((action.ask - 1.5) ** 2 + (action.bid - 0.5) ** 2)


MockAgent(uncertainty=0.001).decide().sample()[0]


Action(log_mid_price=0.26585787534713745, log_spread=0.932205080986023)

In [4]:
def train(learning_rate, num_steps):
    agent = MockAgent(uncertainty=0.001)
    optimizer = torch.optim.Adam(agent.parameters(), lr=learning_rate)
    for step_id in range(num_steps):
        (action,) = agent.decide().sample()
        optimizer.zero_grad()
        agent.loss(action).backward()
        optimizer.step()
    return agent


In [5]:
example_agent = train(learning_rate=2e-2, num_steps=4096)
example_actions = [example_agent.decide().sample()[0] for _ in range(4096)]
go.Figure(
    data=[
        go.Histogram(
            name="Mid",
            x=[action.mid_price for action in example_actions],
        ),
        go.Histogram(
            name="Ask",
            x=[action.ask for action in example_actions],
        ),
        go.Histogram(
            name="Bid",
            x=[action.bid for action in example_actions],
        ),
    ]
)

In [6]:
grid_learning_rates = [1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 1e-1, 2e-1, 5e-1]
grid_num_steps = [256, 1024, 4096]
grid_log_losses = [
    [
        np.log(-train(learning_rate=learning_rate, num_steps=num_steps).mean_reward())
        for num_steps in grid_num_steps
    ]
    for learning_rate in tqdm(grid_learning_rates)
]
go.Figure(
    layout=dict(
        scene=dict(
            xaxis_title="Number of steps",
            xaxis_type="log",
            yaxis_title="Learning rate",
            yaxis_type="log",
            zaxis_title="Log mean loss",
        )
    ),
    data=[
        go.Surface(
            x=grid_num_steps,
            y=grid_learning_rates,
            z=grid_log_losses,
        )
    ]
)

  0%|          | 0/9 [00:00<?, ?it/s]