## Messing around with reinforcement learning and multi-agent trading

In [1]:
import numpy as np
import torch
from itertools import product, combinations, permutations
from collections import defaultdict

### Q class

In [5]:
class Q_table:

    def __init__(self,
                 Np = 31,
                 Nv = 10, 
                 Nx = 15,
                 initial_values = None):

        self.Np = range(Np)
        self.Nv = range(Nv)

        states = list(product(self.Np, self.Nv))

        self.Q = {s:np.zeros(Nx) for s in states}

        # if initial_values is not None:
        #     self.Q = initial_values
        
    def get_Q_value(self, state, action):
        return self.Q[state][action]

    def get_best_action(self, state):
        return np.argmax(self.Q[state])

    def get_best_value(self, state):
        return np.max(self.Q[state])

    def update(self, state, action, value):
        self.Q[state][action] = value

### Fixed-point iteration methods for solving discretization

In [26]:
def solve_chiN(I, xi, sigma_u, sigma_v, theta, tol=1e-12, max_iter=10000):
    """
    Solve for the noncollusive slope chi^N in the Kyle-type model.

    We use the 3-equation system from Section 3.2 in the paper:
      (1) chi^N = 1 / [(I+1)*lambda^N]
      (2) lambda^N = [theta * gamma^N + xi] / (theta + xi^2)
      (3) gamma^N = (I*chi^N) / [(I*chi^N)^2 + (sigma_u/sigma_v)^2]

    We do simple fixed-point iteration over chi^N.

    Returns:
      float: chi^N
    """
    chi = 0.1  # Arbitrary initial guess
    for _ in range(max_iter):
        # Given chi, compute gamma^N:
        gamma = (I * chi) / ((I * chi)**2 + (sigma_u / sigma_v)**2)

        # Then lambda^N:
        lam = (theta * gamma + xi) / (theta + xi**2)

        # Then the updated chi^N:
        new_chi = 1.0 / ((I + 1) * lam)

        if abs(new_chi - chi) < tol:
            return new_chi
        chi = new_chi

    raise RuntimeError("solve_chiN did not converge within max_iter")


def solve_chiM(I, xi, sigma_u, sigma_v, theta, tol=1e-12, max_iter=10000):
    """
    Solve for the *perfect-collusion* slope chi^M in the Kyle-type model.

    From Section 3.3 in the paper:
      (1) chi^M = 1 / [2*I * lambda^M]
      (2) lambda^M = [theta * gamma^M + xi] / (theta + xi^2)
      (3) gamma^M = (I*chi^M) / [(I*chi^M)^2 + (sigma_u/sigma_v)^2]

    Similar fixed-point iteration as above.
    """
    chi = 0.1  # Arbitrary initial guess
    for _ in range(max_iter):
        gamma = (I * chi) / ((I * chi)**2 + (sigma_u / sigma_v)**2)
        lam = (theta * gamma + xi) / (theta + xi**2)
        new_chi = 1.0 / (2.0 * I * lam)

        if abs(new_chi - chi) < tol:
            return new_chi
        chi = new_chi

    raise RuntimeError("solve_chiM did not converge within max_iter")

### Informed (potentially) Collusive Agents

In [None]:
I = 3
class InformedAgent:

    def __init__(
            self, 
            Np = 31,
            Nv = 10,
            Nx = 15, 
            
            rho = 0.95, 
            alpha = 0.01, 
            beta = 1e-5,
            
            sigma_v = 1,
            v_bar = 1,
            sigma_u = 0.1,
            xi = 500,
            theta = 0.1,
            iota = 0.1
            ):
        
        
        # parameters
        self.n_actions = Nx
        self.Np = Np
        self.Nv = Nv
        self.n_states = Np * Nv

        self.rho = rho
        self.alpha = alpha
        self.beta = beta

        self.sigma_v = sigma_v
        self.v_bar = v_bar

        # Q-table for RL
        self.Q = Q_table(Np = Np, Nv = Nv, Nx = Nx)

        # state count dictionary for epsilon decay
        self.state_count = defaultdict(int)

        # discretization of states
        # v
        self.v_discrete = self.get_grid_point_values_v()
        # x
        chiN = solve_chiN(I = I, xi = xi, sigma_u = sigma_u, sigma_v = sigma_v, theta = theta)
        chiM = solve_chiM(I = I, xi = xi, sigma_u = sigma_u, sigma_v = sigma_v, theta = theta)
        x_n, x_m = chiN, chiM # assuming v - v_bar = 1
        span = abs(x_n - x_m)
        low, high = - max(x_n, x_m) - iota * span, max(x_n, x_m) + iota * span
        self.x_discrete = np.linspace(low, high, Nx)
        # p
        



        # self.p_disc
    
    def get_epsilon(self, state):
        v = self.state_count[state]
        self.state_count[state] += 1
        return np.exp(-self.beta * v)
    
    def get_action(self, state):
        epsilon = self.get_epsilon(state)
        if np.random.rand() < epsilon:
            return np.random.randint(self.n_actions)
        else:
            return self.Q.get_best_action(state)
        
    def update(self, state, action, reward, next_state):
        learning = self.alpha * (reward + self.rho * self.Q.get_best_value(next_state))
        memory = (1 - self.alpha) * self.Q.get_Q_value(state, action)
        value = learning + memory
        self.Q.update(state, action, value)
    
    def get_grid_point_values_v(self):
        """
        Returns a zero indexed dictionary of the grid points for the state space of v
        """
        standard_normal = torch.distributions.Normal(0, 1)
        grid_point = [(2 * k - 1) / (2 * self.Nv) for k in range(1, self.Nv + 1)]
        values = standard_normal.icdf(torch.tensor(grid_point))
        return {idx: float(self.v_bar + self.sigma_v * value) for idx, value in enumerate(values)}


In [18]:
a = InformedAgent(Nv = 10)
print(a.v_discrete)

{0: -0.6448533535003662, 1: -0.03643333911895752, 2: 0.32551026344299316, 3: 0.6146795153617859, 4: 0.8743386268615723, 5: 1.1256613731384277, 6: 1.3853204250335693, 7: 1.6744897365570068, 8: 2.036433696746826, 9: 2.644853353500366}


### Preferred Habitat Investor

In [None]:
class PreferredHabitatAgent:

    def __init__(self, xi = 500, v_bar = 1):
        self.xi = xi
        self.v_bar = v_bar

    def get_action(self, pt):
        z = -self.xi * (pt - self.v_bar)
        return z
    
    

### Adaptive Market Makers

In [None]:
class CircularBuffer:
    """
    Circular buffer for storing historical data.
    """
    def __init__(self, size):
        self.size = size
        self.buffer = np.zeros(size)
        self.index = 0

    def add(self, value):
        self.buffer[self.index] = value
        self.index = (self.index + 1) % self.size

    def get(self):
        return np.concatenate((self.buffer[self.index:], self.buffer[:self.index]))

class AdaptiveMarketMaker:

    def __init__(self, theta, Tm):
        self.theta = theta
        self.Tm = Tm

        self.vars_ = ['v','p','z','y']
        self.historical_data = {var: CircularBuffer(size = self.Tm) for var in self.vars_}

    def OLS(self, y, X):
        """
        Perform Ordinary Least Squares (OLS) regression.
        Parameters:
        y (CircularBuffer): The dependent variable.
        X (CircularBuffer): The independent variable(s).
        Returns:
        coef_ (ndarray): The estimated coefficients for the linear regression model.
        """
        y = y.get()
        X = X.get()
        
        X = np.vstack([X, np.ones(len(X))]).T
        coef_, _, _, _ = np.linalg.lstsq(X, y, rcond=None)
        return coef_
    
    def determine_price(self, yt):
        """
        Determines the price based on historical data and a given input.
        This method uses Ordinary Least Squares (OLS) regression to calculate
        yt (float): The input value for which the price needs to be determined.
        Returns:
        float: The determined price based on the input `yt`.
        """

        xi_1, xi_0 = self.OLS(self.historical_data['z'], self.historical_data['p'])
        gamma_1, gamma_0 = self.OLS(self.historical_data['v'], self.historical_data['y'])
        lambda_ = (xi_1 + self.theta * gamma_1) / (xi_1**2 + self.theta)
        price = gamma_0 + lambda_ * yt
        return price
    
    def update(self, vt, pt, zt, yt):
        """
        Updates the historical data with the given values.
        Parameters:
        vt (float): The value of `v` at time `t`.
        pt (float): The value of `p` at time `t`.
        zt (float): The value of `z` at time `t`.
        yt (float): The value of `y` at time `t`.
        """
        for var, value in zip(self.vars_, [vt, pt, zt, yt]):
            self.historical_data[var].add(value)

### Noise Trader

In [None]:
class NoiseAgent:
    def __init__(self, sigma = 0.1):
        self.sigma = sigma

    def get_action(self):
        return np.random.normal(scale = self.sigma)

In [None]:
def simulate(T = 1000000):
    market_maker = AdaptiveMarketMaker()
    noise_agent = NoiseAgent()
    preferred_habitat_agent = PreferredHabitatAgent()
    informed_agents = [InformedAgent() for _ in range(I)]
    _pt = 1
    for t in range(T):

        ut = noise_agent.get_action()
        yt = []
        # state = (_pt, )
        for agent in informed_agents:
            yt.append(agent.get_action())