## Messing around with reinforcement learning and multi-agent trading

In [1]:
import numpy as np
import torch as torch

### Q class

In [2]:
class Q_table:

    def __init__(self,
                 size, 
                 initial_values = None):

        
        self.size = size
        self.Q = np.random.rand(size)
        if initial_values is not None:
            self.Q = initial_values
        
    def get_Q(self, state):
        return self.Q[state]

    def set_Q(self, state, value):
        self.Q[state] = value

    def get_best_action(self, state):
        return np.argmax(self.Q[state])

    def get_best_value(self, state):
        return np.max(self.Q[state])

    def get_Q_values(self):
        return self.Q

    def set_Q_values(self, values):
        self.Q = values

    def update(self, state, action, value):
        self.Q[state, action] = value

In [None]:
class Agent:

    def __init__(self, 
                 n_states, 
                 n_actions, 
                 rho = 0.9, 
                 alpha = 0.1, 
                 epsilon = 0.1):

        self.n_states = n_states
        self.n_actions = n_actions
        self.rho = rho
        self.alpha = alpha
        self.epsilon = epsilon
        self.Q = Q_table((n_states, n_actions))

    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.randint(self.n_actions)
        else:
            return self.Q.get_best_action(state)
        
    def update(self, state, action, reward, next_state):
        target = reward + self.rho * self.Q.get_best_value(next_state)
        error = target - self.Q.get_Q(state, action)
        self.Q.update(state, action, self.Q.get_Q(state, action) + self.alpha * error)
