In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import gym
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import logging
from datetime import datetime

from uniswapv3_simulator.pool import Uniswapv3Pool
from uniswapv3_simulator.optimization.environments import OneStepEnvironment
from uniswapv3_simulator.optimization.liquidity_fns import sech2_fn
from uniswapv3_simulator.math import tick_to_sqrt_price, sqrt_price_to_tick

from uniswapv3_simulator.optimization.ddpg.ddpg import (
    DDPG,
    DDPGTrainer,
    DeepActorModel,
    TrainArgs
)
from uniswapv3_simulator.optimization.ddpg.exploration_noise import OrnsteinUhlenbeckProcess, GaussianProcess
from uniswapv3_simulator.optimization.ddpg.schedulers import ExponentialScheduler

timestamp = datetime.now().strftime('%y%m%d%H%M%S')
logging.basicConfig(
    level=logging.INFO,
    filename=f'./logs/rl_test_{timestamp}.log'
)
logging.getLogger('optimization').setLevel(logging.DEBUG)

In [2]:
init_price = 100
pool_fees = 0.01

mu = stats.uniform(-0.025, 0.05)
sigma = stats.uniform(0.0, 0.1) 
alpha = stats.randint(5, 10)
beta = stats.randint(50, 150)
q = stats.uniform(0.5, 0.0)

mu.random_state = 1
sigma.random_state = 2
alpha.random_state = 3
beta.random_state = 4
q.random_state = 5

tick_width = 1
max_price = 500
seed = 6

n_sims_per_step = 3000
n_jobs = 7

env = OneStepEnvironment(
    init_price, pool_fees, sech2_fn, 
    mu, sigma, alpha, beta, q, n_jobs=n_jobs,
    n_sims_per_step=n_sims_per_step, 
    tick_width=1, 
    max_price=max_price, seed=seed
)

In [3]:
model = DeepActorModel(
    7, 
    3,
    (128, 64), 
    (128, 64), 
    np.array([200, 100, 10000])
)
agent = DDPG(
    model=model,
    gamma=0.99,
    tau=1e-3,
    optimizer=optim.Adam,
    actor_optimizer_kwargs={
        'lr': 1e-4,
        'weight_decay': 1e-5
    },
    critic_optimizer_kwargs={
        'lr': 1e-3,
        'weight_decay': 1e-5
    },
    clip_gradients=None
)
train_args = TrainArgs(
    train_steps=10000,
    batch_size=32, 
    memory_size=100000,
    exploration_noise=GaussianProcess,
    noise_kwargs={
        'size': (3, ), 
        'std': ExponentialScheduler(
            np.array([30, 30, 1000]), 
            np.array([5, 5, 100]), 
            0.9995
        )
    },
    update_start=200,
    update_freq=5,
    clip_actions=(
        np.array([0, 1, 1]), 
        np.array([np.inf, np.inf, np.inf])
    ),
    seed=123
)
trainer = DDPGTrainer(agent, env, train_args)

In [4]:
model

DeepActorModel(
  (critic_layers): Sequential(
    (0): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=10, out_features=128, bias=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Linear(in_features=64, out_features=1, bias=True)
  )
  (actor_layers): Sequential(
    (0): BatchNorm1d(7, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=7, out_features=128, bias=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [5]:
%%time
rewards = trainer.train()

Episode:    50 | Time Steps:     1 | Mean Score: -8,806.80
Episode:   100 | Time Steps:     1 | Mean Score: -7,176.43
Episode:   150 | Time Steps:     1 | Mean Score: -12,049.99
Episode:   200 | Time Steps:     1 | Mean Score: -10,989.17
Episode:   250 | Time Steps:     1 | Mean Score: -73,998.76
Episode:   300 | Time Steps:     1 | Mean Score: -30,585.65
Episode:   350 | Time Steps:     1 | Mean Score: -45,116.09
Episode:   400 | Time Steps:     1 | Mean Score: -28,436.15
Episode:   450 | Time Steps:     1 | Mean Score: -38,704.52
Episode:   500 | Time Steps:     1 | Mean Score: -23,163.24
Episode:   550 | Time Steps:     1 | Mean Score: -22,566.96
Episode:   600 | Time Steps:     1 | Mean Score: -16,987.82
Episode:   650 | Time Steps:     1 | Mean Score: -21,071.98
Episode:   700 | Time Steps:     1 | Mean Score: -15,069.99
Episode:   750 | Time Steps:     1 | Mean Score: -13,086.29
Episode:   800 | Time Steps:     1 | Mean Score: -14,663.68
Episode:   850 | Time Steps:     1 | Mean 

Process SpawnPoolWorker-33123:
Process SpawnPoolWorker-33121:
Process SpawnPoolWorker-33124:
Process SpawnPoolWorker-33119:
Process SpawnPoolWorker-33118:
Process SpawnPoolWorker-33122:
Process SpawnPoolWorker-33120:


KeyboardInterrupt: 