In [108]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
import math
import numpy as np
import random
from collections import deque
import matplotlib.pyplot as plt  
from tqdm import tqdm

In [109]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size # normalized previous days
        self.action_size = 3 # sit, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = load_model(model_name) if is_eval else self._model()
    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(learning_rate=0.001))
        return model
    def act(self, state):
        if not self.is_eval and random.random()<= self.epsilon:
            return random.randrange(self.action_size)
        options = self.model.predict(state)
        return np.argmax(options[0])
    def expReplay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma *
                        np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            state = np.reshape(state, (1, window_size, num_features))  # Reshape as needed
            loss = self.model.fit(state, target_f, epochs=1, verbose=0).history['loss'][0]
            losses.append(loss)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [110]:
def formatPrice(n):
    return("-$" if n<0 else "$")+"{0:.2f}".format(abs(n))

def getStockDataVec(key):
    vec = []
    lines = open(key+".csv","r").read().splitlines()
    for line in lines[1:]:
        #print(line)
        #print(float(line.split(",")[4]))
        vec.append(float(line.split(",")[4]))
        #print(vec)
    return vec 

def sigmoid(x):
    return 1/(1+math.exp(-x))

def getState(data, t, n):
    d = t - n + 1
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i + 1] - block[i]))
    return np.array([res]) 

In [117]:
import sys
import os
import json

stock_name = "data/AMZN"
window_size = 10
episode_count = 2000
stock_name = str(stock_name)
window_size = int(window_size)
episode_count = int(episode_count)
agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32
losses = []
model_dir = "saved_models"
losses_dir = "loss_file"

def find_latest_checkpoint():
    checkpoint_files = [int(f.split('.')[0]) for f in os.listdir(model_dir) if f.endswith('.keras')]
    return max(checkpoint_files) if checkpoint_files else None

latest_checkpoint = 0

latest_checkpoint = find_latest_checkpoint()

start_episode = 0
if latest_checkpoint:
    agent.model = load_model(os.path.join(model_dir, f"{latest_checkpoint}.keras"))
    with open(os.path.join(losses_dir, f"losses_{latest_checkpoint}.json"), 'r') as f:
        losses = json.load(f)
    start_episode = latest_checkpoint + 1
    print(f"Resuming from episode {start_episode}")

In [None]:
for e in tqdm(range(start_episode, episode_count + 1)):
    print("Episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 0, window_size + 1)
    total_profit = 0
    agent.inventory = []
    episode_loss = 0
    for t in range(l):
        action = agent.act(state)
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0
        if action == 1:  # buy
            agent.inventory.append(data[t])
            print("Buy: " + formatPrice(data[t]))
        elif action == 2 and len(agent.inventory) > 0:  # sell
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
        if len(agent.memory) > batch_size:
            # Track loss during training
            loss = agent.expReplay(batch_size)
            episode_loss += loss

    average_episode_loss = episode_loss / l
    losses.append(average_episode_loss)
    
    if e % 10 == 0:  # Save every 10 episodes
        agent.model.save(os.path.join(model_dir, f"{e}.keras"))
        
        loss_file = os.path.join(losses_dir, f"losses_{e}.json")
        with open(loss_file, 'w') as f:
            json.dump(losses, f)


In [None]:
plt.plot(range(episode_count + 1), losses, marker='o')
plt.xlabel('Episode')
plt.ylabel('Loss')
plt.title('Training Loss per Episode')
plt.show()

In [121]:
stock_name = "data/AMZN"
model_name = "./saved_models/60.keras"
model = load_model(model_name)
window_size = model.layers[0].input.shape[1]
agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
print(data)
l = len(data) - 1
batch_size = 32
state = getState(data, 0, window_size + 1)
print(state)
total_profit = 0
agent.inventory = []
print(l)
for t in range(l):
    action = agent.act(state)
    print(action)
    # sit
    next_state = getState(data, t + 1, window_size + 1)
    reward = 0
    if action == 1: # buy
        agent.inventory.append(data[t])
        print("Buy: " + formatPrice(data[t]))
    elif action == 2 and len(agent.inventory) > 0: # sell
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
    done = True if t == l - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state
    if done:
        print("--------------------------------")
        print(stock_name + " Total Profit: " + formatPrice(total_profit))
        print("--------------------------------")
        print ("Total profit is:",formatPrice(total_profit))

[137.850006, 138.559998, 138.410004, 140.570007, 137.669998, 135.070007, 133.979996, 133.220001, 134.679993, 134.25, 135.520004, 131.839996, 133.259995, 133.139999, 134.910004, 135.070007, 138.009995, 138.119995, 137.270004, 135.360001, 137.850006, 138.229996, 143.100006, 141.229996, 144.850006, 144.720001, 140.389999, 139.979996, 137.630005, 135.289993, 129.330002, 129.119995, 131.270004, 125.980003, 125.980003, 125.980003, 127.120003, 129.460007, 124.720001, 127.0, 125.959999, 127.959999, 128.259995, 129.479996, 131.830002, 132.330002, 129.789993, 132.550003, 131.470001, 128.130005, 128.399994, 125.169998, 126.559998, 128.559998, 121.389999, 119.57, 127.739998, 132.710007, 133.089996]
[[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]]
58
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
1
Buy: $138.41
[1m1/1[0m [32m━━━━━