In [13]:
import numpy as np
from keras.models import Sequential, load_model
from keras.layers import Dense
import tensorflow as tf
import datetime
import random
import seaborn as sns
import matplotlib.pyplot as plt
random.seed(datetime.datetime.now())

In [6]:
actions = [x * 0.1 for x in range(-10, 11, 1)]

In [9]:
actions

[-1.0,
 -0.9,
 -0.8,
 -0.7000000000000001,
 -0.6000000000000001,
 -0.5,
 -0.4,
 -0.30000000000000004,
 -0.2,
 -0.1,
 0.0,
 0.1,
 0.2,
 0.30000000000000004,
 0.4,
 0.5,
 0.6000000000000001,
 0.7000000000000001,
 0.8,
 0.9,
 1.0]

In [10]:
class RLAgent:
    ALPHA = 0.1
    EPOCHS = 1000
    BATCH_SIZE = 4096
    INPUT_DIM = 5 # portfolio_amount, stock_price, negative_score, neutral_score, positive_score
    OUTPUT_DIM = len(actions) # see actions above
    MEMORY_SIZE = 20000 # Number of state transitions to store
    NUM_ACTIONS = OUTPUT_DIM
    GAMMA = 0.99 # Discount factor
    
    memory = [None] * MEMORY_SIZE # List of state transitions, not stored in order of trajectory
    model = None # Q learning model
    target_model = None # Q learning model
    modelPath = None
    fig_count = 0
    
    def __init__(self, modelPath = None):
        self.modelPath = modelPath
        if(self.modelPath != None):
            try:
                self.model = load_model(modelPath)
                print("Model Successfully Loaded")
            except:
                self.model = None
        else:
            self.modelPath = "model.h5"
        if(self.model == None):
            self.model = self.createModel()
        self.target_model = self.createModel()
        self.target_model.set_weights(self.model.get_weights())
    
    def createModel(self):
        init = tf.keras.initializers.HeUniform()
        model = Sequential()
        model.add(Dense(32, input_dim = self.INPUT_DIM, activation='relu', kernel_initializer=init))
        model.add(Dense(32, activation='relu', kernel_initializer=init))
        model.add(Dense(self.OUTPUT_DIM, kernel_initializer=init))
        
        model.compile(loss = tf.keras.losses.MeanSquaredError(), optimizer='adam')
        return model
    
    def replay(self):
        memory = []
        for m in self.memory:
            if(m != None):
                memory.append(m)
        random.shuffle(memory)
        # each element in memory is state0, action, state1, reward, terminal
        s0 = [m[0] for m in memory]
        s1 = [m[2] for m in memory]
        q0 = self.model.predict(np.array(s0))
        q1 = self.target_model.predict(np.array(s1))
        
        # Apply bellman ford
        for i in range(len(memory)):
            action = memory[i][1]
            reward = memory[i][3]
            terminal = memory[i][4]
            if(terminal):
                q0[i][action] += self.ALPHA * (reward - q0[i][action])
            else:
                q0[i][action] += self.ALPHA * (reward + self.GAMMA * max(q1[i]) - q0[i][action])
        
        history = self.model.fit(np.array(s0), q0, batch_size = self.BATCH_SIZE, epochs=self.EPOCHS, verbose=0)
        history = history.history['loss']
        sns.lineplot(x = range(len(history)), y = history)
        plt.savefig(f"figures/fig_{self.fig_count}.png")
        plt.clf()
        self.fig_count += 1
        
    
    def updateModel(self):
        self.target_model.set_weights(self.model.get_weights())
        
        # save model
        self.target_model.save(self.modelPath)
    
    def parseState(self, gameState):
        return gameState
    
    def addStateTransition(self, stateTransition):
        for i in range(len(self.memory)):
            if self.memory[i] == None:
                self.memory[i] = stateTransition
                return
        del self.memory[random.randint(0, len(self.memory) - 1)] #remove one element
        self.memory.append(stateTransition)
    
    def stateTransition(self, gs0, action, gs1, reward, terminal):
        st = [self.parseState(gs0), action, self.parseState(gs1), reward, terminal]
        self.addStateTransition(st)
    
    def getAction(self, gs, ep = 0):
        if random.random() < ep:
            return random.randrange(0, self.NUM_ACTIONS)
        s = self.parseState(gs)
        q = self.model.predict([s])
        
        return np.argmax(q[0])
    
    def parseAction(self, action):
        a = actions[action]
        return [0 if a < 0 else (1 if a == 0 else 2), abs(a)]