In [1]:
import numpy as np
from scipy import stats
import torch
import torch.optim as optim
import multiprocessing as mp
import logging
from datetime import datetime

from uniswapv3_simulator.pool import Uniswapv3Pool
from uniswapv3_simulator.optimization.environments import OneStepEnvironment, ScaleWrapper
from uniswapv3_simulator.optimization.ddpg.ddpg import (
    DDPG,
    DDPGTrainer,
    DeepActorModel,
    TrainArgs
)
from uniswapv3_simulator.optimization.ddpg.exploration_noise import GaussianProcess
from uniswapv3_simulator.optimization.ddpg.schedulers import ExponentialScheduler

timestamp = datetime.now().strftime('%y%m%d%H%M%S')
logging.basicConfig(
    level=logging.INFO,
    filename=f'./logs/rl_test_{timestamp}.log'
)
logging.getLogger('optimization').setLevel(logging.DEBUG)

In [2]:
SEED = 1234
seed_seq = np.random.SeedSequence(entropy=SEED)
seeds = seed_seq.generate_state(8)

init_price = 100
liquidity_bins = [80, 85, 90, 95, 100, 105, 110, 115, 120]

# fees = stats.uniform(1e-4, 0.01 - 1e-4)
# mu = stats.uniform(-0.05, 0.1)
# sigma = stats.uniform(1e-4, 0.1 - 1e-4) 
# alpha = stats.randint(1, 100 + 1)
# beta = stats.randint(100, 1000 + 1)

fees = stats.uniform(0.01, 0.0)
mu = stats.uniform(0.00, 0.0)
sigma = stats.uniform(0.05, 0.0) 
alpha = stats.randint(50, 50 + 1)
beta = stats.randint(500, 10000 + 1)  # vary beta

fees.random_state = seeds[0]
mu.random_state = seeds[1]
sigma.random_state = seeds[2]
alpha.random_state = seeds[3]
beta.random_state = seeds[4]

n_sims_per_step = 500
n_jobs = 1#int(mp.cpu_count() / 1) - 1

env = OneStepEnvironment(
    init_price, liquidity_bins,
    fees, mu, sigma, alpha, beta,
    n_sims_per_step=n_sims_per_step, 
    n_jobs=n_jobs, seed=seeds[5]
)

In [3]:
print('Random Variables')
print(f'fees:  mean={fees.mean():,.4f}, std={fees.std():,.4f}')
print(f'mu:    mean={mu.mean():,.4f}, std={mu.std():,.4f}')
print(f'sigma: mean={sigma.mean():,.4f}, std={sigma.std():,.4f}')
print(f'alpha: mean={alpha.mean():,.2f}, std={alpha.std():,.2f}')
print(f'beta:  mean={beta.mean():,.2f}, std={beta.std():,.2f}')

Random Variables
fees:  mean=nan, std=nan
mu:    mean=nan, std=nan
sigma: mean=nan, std=nan
alpha: mean=50.00, std=0.00
beta:  mean=5,250.00, std=2,742.70


  g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0)


In [4]:
def obs_scale_fn(obs):
    obs = obs[[4]]
    mu = np.array([5250.00])
    sigma = np.array([2742.70])
    # mu =    np.array([0.0051, 0.0000, 0.0501, 50.50, 550.00])
    # sigma = np.array([0.0029, 0.0289, 0.0288, 28.87, 260.10])
    
    return (obs - mu) / sigma

def action_scale_fn(action):
    return action * 1e+5

def reward_scale_fn(reward):
    return reward * 1e+2

env = ScaleWrapper(env, obs_scale_fn, action_scale_fn, reward_scale_fn)

In [5]:
torch.manual_seed(seeds[6])
action_size = len(liquidity_bins) - 1
model = DeepActorModel(1, action_size, (128, 64), (128, 64))
agent = DDPG(
    model=model,
    gamma=0.99,
    tau=1e-3,
    optimizer=optim.Adam,
    actor_optimizer_kwargs={
        'lr': 1e-4,
        'weight_decay': 1e-5
    },
    critic_optimizer_kwargs={
        'lr': 1e-3,
        'weight_decay': 1e-5
    },
    clip_gradients=10.0
)
train_args = TrainArgs(
    train_steps=3000,
    batch_size=64, 
    memory_size=100000,
    exploration_noise=GaussianProcess,
    noise_kwargs={
        'size': (action_size, ), 
        'std': ExponentialScheduler(0.2, 0.01, 0.9995)
    },
    update_start=50,
    update_freq=2,
    clip_actions=(1e-6, np.inf),
    seed=seeds[7]
)
trainer = DDPGTrainer(agent, env, train_args)

In [6]:
model

DeepActorModel(
  (critic_layers): Sequential(
    (0): Linear(in_features=9, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=1, bias=True)
  )
  (actor_layers): Sequential(
    (0): Linear(in_features=1, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=8, bias=True)
  )
)

In [7]:
%%time
rewards = trainer.train()

[1784.]
[889.5          2.06543692]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x2 and 1x128)