In [None]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
import math
import numpy as np
import random
from collections import deque

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
class Agent:
  def __init__(self, state_size, is_eval=False, model_name=""):
    self.state_size = state_size # normalized previous days
    self.action_size = 3 # sit, buy, sell
    self.memory = deque(maxlen=1000)
    self.inventory = []
    self.model_name = model_name
    self.is_eval = is_eval
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_min = 0.01
    self.epsilon_decay = 0.995
    self.model = load_model(model_name) if is_eval else self._model()
  def _model(self):
     model = Sequential()
     model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
     model.add(Dense(units=32, activation="relu"))
     model.add(Dense(units=8, activation="relu"))
     model.add(Dense(self.action_size, activation="linear"))
     model.compile(loss="mse", optimizer=Adam(lr=0.001))
     return model
  def act(self, state):
    if not self.is_eval and random.random()<= self.epsilon:
        return random.randrange(self.action_size)
    options = self.model.predict(state)
    return np.argmax(options[0])
  def expReplay(self, batch_size):
    mini_batch = []
    l = len(self.memory)
    for i in range(l - batch_size + 1, l):
      mini_batch.append(self.memory[i])
    for state, action, reward, next_state, done in mini_batch:
      target = reward
      if not done:
        target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
      target_f = self.model.predict(state)
      target_f[0][action] = target
      self.model.fit(state, target_f, epochs=1, verbose=0)
    if self.epsilon > self.epsilon_min:
      self.epsilon *= self.epsilon_decay

In [None]:
def formatPrice(n):
  return("-Rs." if n<0 else "Rs.")+"{0:.2f}".format(abs(n))
def getStockDataVec(key):
  vec = []
  lines = open(key+".csv","r").read().splitlines()
  for line in lines[1:]:
        #print(line)
        #print(float(line.split(",")[4]))
    vec.append(float(line.split(",")[4]))
        #print(vec) 
  return vec 
def sigmoid(x):
  return 1/(1+math.exp(-x))
def getState(data, t, n):
  d = t - n + 1
  block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
  res = []
  for i in range(n - 1):
    res.append(sigmoid(block[i + 1] - block[i]))
  return np.array([res])

# **Training the Agent**

In [None]:
import sys
#stock_name = input("Enter stock_name, window_size, Episode_count")
#window_size = input()
#episode_count = input()
stock_name='/content/drive/MyDrive/ThuatToan/DeTai_code/DRL_test_moi/data_AQI_2'
window_size =4
episode_count = 2

stock_name = str(stock_name)
window_size = int(window_size)
episode_count = int(episode_count)
agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32
for e in range(episode_count + 1):
    print("Episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 0, window_size + 1)
    total_profit = 0
    agent.inventory = []
    for t in range(l):
        action = agent.act(state)
        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0
        if action == 1: # buy
            agent.inventory.append(data[t])
            print("Buy: " + formatPrice(data[t]))
        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = window_size_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
    if e % 2 == 0:
       # print("Phuong -"+str(e))
        agent.model.save('/content/drive/MyDrive/ThuatToan/DeTai_code/DRL_test_moi/'+str(e))

IndexError: ignored

# Evaluation of the **model**

In [None]:
#stock_name = input("Enter Stock_name, Model_name")
#model_name = input()
stock_name='/content/drive/MyDrive/ThuatToan/DeTai_code/DRL_test_moi/data_AQI'
model_name = '/content/drive/MyDrive/ThuatToan/DeTai_code/DRL_test_moi/'
model = load_model(model_name)
window_size = model.layers[0].input.shape.as_list()[1]
agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
print(data)
l = len(data) - 1
batch_size = 32
state = getState(data, 0, window_size + 1)
print(state)
total_profit = 0
agent.inventory = []
print(l)
for t in range(l):
    action = agent.act(state)
    print(action)
    # sit
    next_state = getState(data, t + 1, window_size + 1)
    reward = 0
    if action == 1: # buy
        agent.inventory.append(data[t])
        print("Buy: " + formatPrice(data[t]))
    elif action == 2 and len(agent.inventory) > 0: # sell
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
    done = True if t == l - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state
    if done:
        print("--------------------------------")
        print(stock_name + " Total Profit: " + formatPrice(total_profit))
        print("--------------------------------")
        print ("Total profit is:",formatPrice(total_profit))

[540.849976, 539.924988, 540.450012, 532.400024, 527.299988, 526.025024, 530.474976, 530.400024, 524.299988, 534.450012, 535.424988, 533.025024, 532.174988, 531.974976, 539.700012, 538.875, 541.5, 538.900024, 538.375, 529.450012, 530.974976, 524.625, 523.724976, 530.299988, 523.174988, 527.525024, 529.724976, 533.650024, 540.125, 536.5, 537.700012, 533.224976, 537.049988, 537.0, 538.625, 538.974976, 537.150024, 541.075012, 544.375, 535.25, 531.200012, 533.549988, 528.099976, 531.174988, 529.299988, 523.474976, 530.075012, 524.875, 521.075012, 512.924988, 517.900024, 509.149994, 511.5, 515.150024, 520.575012, 517.825012, 515.849976, 524.924988, 529.875, 526.900024, 521.924988, 524.599976, 527.625, 517.924988, 513.025024, 506.475006, 487.649994, 485.700012, 486.799988, 486.725006, 487.450012, 494.725006, 494.649994, 494.375, 485.024994, 474.799988, 471.325012, 480.5, 485.924988, 492.075012, 503.600006, 507.924988, 510.274994, 507.075012, 512.400024, 510.625, 514.349976, 513.775024, 513.3