<a href="https://colab.research.google.com/github/siddharthksingh/stock-pred-models/blob/main/assign_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
import math
import numpy as np
import random
from collections import deque

In [10]:
class Agent:
    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.statesize, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        model.add(Dense(self.actionsize, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(learning_rate=0.001))
        return model

    def __init__(self, statesize, is_eval=False, model_name=""):
        self.statesize = statesize 
        self.actionsize = 3 
        self.mem = deque(maxlen=1000)
        self.stockprice = []
        self.bought = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = load_model(model_name) if is_eval else self._model()
    

    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.mem)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.mem[i])
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def act(self, state):
        if not self.is_eval and random.random()<= self.epsilon:
            return random.randrange(self.actionsize)
        options = self.model.predict(state)
        return np.argmax(options[0])

In [11]:
def formatPrice(n):
    return("-$" if n<0 else "$")+"{0:.2f}".format(abs(n))

def sigmoidfn(t):
    return 1/(1+math.exp(-t))

def getStockDataVec():
    lines = open("SONY.csv","r").read().splitlines()
    A = []
    for line in lines[1:]:
        A.append(float(line.split(",")[4]))
    return A 

def getState(data, t, n):
    d = t - n + 1
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoidfn(block[i + 1] - block[i]))
    return np.array([res])

In [12]:
import sys
window_size = input("Window Size,Episode Count,Maximum Transactions,Net Money")
episode_count = input()
max_transactions = input()
total_money = input()
window_size = int(window_size)
episode_count = int(episode_count)
max_transactions = int(max_transactions)
total_money = int(total_money)
current_bought = 0
current_transaction_count = 0
agent = Agent(window_size)
data = getStockDataVec()
l = len(data) - 1
batch_size = 4
for k in range(episode_count + 1):
    print("Episode " + str(k) + "/" + str(episode_count))
    state = getState(data, 0, window_size + 1)
    total_profit = 0
    agent.stockprice = []
    agent.stock_bought = []
    current_bought = 0
    temp_money = total_money
    current_transaction_count = 0
    for t in range(l):
        action = agent.act(state)
        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0
        if action == 1 and current_transaction_count < max_transactions and temp_money > 0: # buy
            x = temp_money/(max_transactions-current_transaction_count)
            n = x/data[t]
            temp_money = temp_money-x
            agent.bought.append(n)
            agent.stockprice.append(data[t])
            current_transaction_count+=1
            print("Buy: " + formatPrice(data[t]))
        elif action == 2 and len(agent.stockprice) > 0 : # sell
            bought_price = 0
            stocks_count = 0
            for i in range(len(agent.stockprice)):
              bought_price+=agent.bought[i]*agent.stockprice[i];
              stocks_count+=agent.bought[i]
            reward = stocks_count*data[t] - bought_price
            total_profit += reward
            temp_money+=stocks_count*data[t]
            current_transaction_count = 0
            agent.stockprice.clear()
            agent.bought.clear()
            print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(reward))
        else:
            bought_price = 0
            stocks_count = 0
            for i in range(len(agent.stockprice)):
              bought_price+=agent.bought[i]*agent.stockprice[i];
              stocks_count+=agent.bought[i]
            pf1 = stocks_count*data[t] - bought_price
            reward = pf1/100
        done = True if t == l - 1 else False
        agent.mem.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            print("--------*****--------------*****----------")
            print("Episode Profit- " + formatPrice(total_profit))
            print("--------*****--------------*****----------")
        if len(agent.mem) > batch_size:
            agent.expReplay(batch_size)
    if k % 10 == 0:
        agent.model.save(str(k))

Window Size,Episode Count,Maximum Transactions,Net Money20
10
50
10000
Episode 0/10
Buy: $21.90
Buy: $21.43
Sell: $22.85 | Profit: $21.93
Buy: $19.33
Sell: $20.32 | Profit: $10.27
Buy: $19.46
Sell: $19.89 | Profit: $4.43
Buy: $20.44
Sell: $19.32 | Profit: -$11.00
Buy: $17.21
Sell: $16.98 | Profit: -$2.68
Buy: $17.06
Sell: $16.56 | Profit: -$5.88
Buy: $16.46
Buy: $17.58
Buy: $18.31
Sell: $19.38 | Profit: $67.76
Buy: $21.69
Buy: $21.40
Sell: $22.86 | Profit: $24.64
Buy: $22.27
Sell: $20.63 | Profit: -$14.89
Buy: $24.35
Buy: $24.23
Sell: $23.83 | Profit: -$7.64
Buy: $26.37
Buy: $25.70
Buy: $26.23
Sell: $25.53 | Profit: -$13.14
Buy: $26.14
Buy: $25.86
Buy: $27.23
Sell: $27.25 | Profit: $19.53
Buy: $26.95
Sell: $26.84 | Profit: -$0.82
Buy: $25.85
Sell: $26.26 | Profit: $3.20
Buy: $26.11
Sell: $25.78 | Profit: -$2.55
Buy: $28.00
Buy: $27.37
Sell: $27.71 | Profit: $0.42
Buy: $27.42
Sell: $27.29 | Profit: -$0.96
Buy: $25.96
Sell: $26.39 | Profit: $3.34
Buy: $25.98
Sell: $26.23 | Profit: $1.94


In [13]:
model = agent.model
window_size = model.layers[0].input.shape.as_list()[1]
agent = Agent(window_size, True, "")
data = getStockDataVec()
print(data)
l = len(data) - 1
batch_size = 4
state = getState(data, 0, window_size + 1)
print(state)
total_profit = 0
current_stocks_bought = 0
current_transaction_count = 0
agent.inventory = []
agent.stocks_bought = []
print(l)
for t in range(l):
    action = agent.act(state)
    # print(action)
    # sit
    next_state = getState(data, t + 1, window_size + 1)
    reward = 0
    if action == 1 and current_transaction_count < max_transactions and t_money > 0: # buy
      x = t_money/(max_transactions-current_transaction_count)
      n = x/data[t]
      t_money = t_money-x
      agent.stocks_bought.append(n)
      agent.inventory.append(data[t])
      current_transaction_count+=1
      print("Buy: " + formatPrice(data[t]))
    elif action == 2 and len(agent.inventory) > 0 : # sell
      bought_price = 0
      stocks_count = 0
      for i in range(len(agent.inventory)):
        bought_price+=agent.stocks_bought[i]*agent.inventory[i];
        stocks_count+=agent.stocks_bought[i]
      reward = stocks_count*data[t] - bought_price
      total_profit += reward
      t_money+=stocks_count*data[t]
      current_transaction_count = 0
      agent.inventory.clear()
      agent.stocks_bought.clear()
      print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(reward))
    else:
      bought_price = 0
      stocks_count = 0
      for i in range(len(agent.inventory)):
        bought_price+=agent.stocks_bought[i]*agent.inventory[i];
        stocks_count+=agent.stocks_bought[i]
      profit1 = stocks_count*data[t] - bought_price
      reward = -1*profit1/100
    done = True if t == l - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state
    if done:
        print("--------------------------------")
        print(" Total Profit: " + formatPrice(total_profit))
        print("--------------------------------")
        print ("Total profit is:",formatPrice(total_profit))

OSError: ignored