## Etapa 1: Instalação da bibliotecas


In [3]:
!conda install tensorflow-gpu==2.0.0

Collecting package metadata (repodata.json): done
Solving environment: - ^C
failed with initial frozen solve. Retrying with flexible solve.

CondaError: KeyboardInterrupt



In [4]:
!conda install -f pandas-datareader

^C

CondaError: KeyboardInterrupt



## Etapa 2: Importação das bibliotecas

In [5]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque

In [6]:
tf.__version__

'1.14.0'

## Etapa 3: Construção da IA para negociação de ações

In [7]:
class AI_Trader():
  
  def __init__(self, state_size, action_space = 3, model_name = "AITrader"):
    # O estado que esta vindo do ambiente
    self.state_size = state_size
    # Acoes possiveis que podemos tomar
    self.action_space = action_space
    # +- O numero de acoes para depois fazer o treinamento.
    self.memory = deque(maxlen = 2000)

    self.model_name = model_name
    
    self.gamma = 0.95
    # param que determina se o agente vai seguir a rede ou escolher aleatorio. 1 quer dizer que vai ser 100% aleatorio, o motivo disso, pois no comeco a rede ainda nao esta treinada, entao precisa ser feito escolhas randomicas
    self.epsilon = 1.0
    # como esse param sera reduzido, a gente precisa coloca um minimo, obs que nao eh 0, entao em determinados momentos ele ira escolher pontos aleatorios
    self.epsilon_final = 0.01
    # esse valor determina o quanto o param de aleatoridade ira cair.
    self.epsilon_decay = 0.995
    self.model = self.model_builder()
    
  def model_builder(self):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units = 32, activation = "relu", input_dim = self.state_size))
    model.add(tf.keras.layers.Dense(units = 64, activation = "relu"))
    model.add(tf.keras.layers.Dense(units = 128, activation = "relu"))
    model.add(tf.keras.layers.Dense(units = self.action_space, activation = "linear"))
    model.compile(loss = "mse", optimizer = tf.keras.optimizers.Adam(lr = 0.001))
    return model
  
  def trade(self, state):
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  def batch_train(self, batch_size):
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay
      

In [8]:
random.randrange(3)

2

## Etapa 4: Pré-processamento da base de dados

### Definição de funções auxiliares

#### Sigmoid

In [9]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [10]:
sigmoid(0.5)

0.6224593312018546

#### Formatação de preços

In [11]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [12]:
stocks_price_format(100)

'$ 100.000000'

#### Carregador da base de dados

In [13]:
dataset = data_reader.DataReader("AAPL", data_source = "yahoo")

In [14]:
dataset.tail()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-09,270.799988,264.910004,270.0,266.920013,32010600.0,266.920013
2019-12-10,270.070007,265.859985,268.600006,268.480011,22605100.0,268.480011
2019-12-11,271.100006,268.5,268.809998,270.769989,19689200.0,270.769989
2019-12-12,272.559998,267.320007,267.779999,271.459991,34327600.0,271.459991
2019-12-13,275.299988,270.929993,271.459991,275.149994,33396900.0,275.149994


In [15]:
str(dataset.index[0]).split()[0]

'2014-12-17'

In [16]:
dataset.index[-1]

Timestamp('2019-12-13 00:00:00')

In [17]:
dataset['Close']

Date
2014-12-17    109.410004
2014-12-18    112.650002
2014-12-19    111.779999
2014-12-22    112.940002
2014-12-23    112.540001
2014-12-24    112.010002
2014-12-26    113.989998
2014-12-29    113.910004
2014-12-30    112.519997
2014-12-31    110.379997
2015-01-02    109.330002
2015-01-05    106.250000
2015-01-06    106.260002
2015-01-07    107.750000
2015-01-08    111.889999
2015-01-09    112.010002
2015-01-12    109.250000
2015-01-13    110.220001
2015-01-14    109.800003
2015-01-15    106.820000
2015-01-16    105.989998
2015-01-20    108.720001
2015-01-21    109.550003
2015-01-22    112.400002
2015-01-23    112.980003
2015-01-26    113.099998
2015-01-27    109.139999
2015-01-28    115.309998
2015-01-29    118.900002
2015-01-30    117.160004
                 ...    
2019-11-01    255.820007
2019-11-04    257.500000
2019-11-05    257.130005
2019-11-06    257.239990
2019-11-07    259.429993
2019-11-08    260.140015
2019-11-11    262.200012
2019-11-12    261.959991
2019-11-13    264.47

In [18]:
def dataset_loader(stock_name):
  dataset = data_reader.DataReader(stock_name, data_source = "yahoo")
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  close = dataset['Close']
  return close

### Criador de estados

In [19]:
0 - 5 + 1

-4

In [20]:
20 - 5 + 1

16

In [21]:
dataset[16:21]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-12,112.629997,108.800003,112.599998,109.25,49650800.0,100.380791
2015-01-13,112.800003,108.910004,111.43,110.220001,67091900.0,101.272049
2015-01-14,110.489998,108.5,109.040001,109.800003,48956600.0,100.886154
2015-01-15,110.059998,106.660004,110.0,106.82,60014000.0,98.148056
2015-01-16,107.580002,105.199997,107.029999,105.989998,78513300.0,97.385445


In [22]:
def state_creator(data, timestep, window_size):
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep + 1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep + 1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i + 1] - windowed_data[i]))
    
  return np.array([state]), windowed_data

### Carregando a base de dados

In [23]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [24]:
s, w = state_creator(data, 1000, 5)

In [25]:
s

array([[9.98053950e-01, 2.94480018e-04, 1.22388756e-01, 1.96558916e-03]])

In [26]:
w

Date
2018-11-30    178.580002
2018-12-03    184.820007
2018-12-04    176.690002
2018-12-06    174.720001
2018-12-07    168.490005
Name: Close, dtype: float64

## Etapa 5: Treinando a IA

### Configuração dos hyper parâmetros

In [40]:
window_size = 20
episodes = 1000
batch_size = 32
data_samples = len(data) - 1
data_samples

1256

### Definição do modelo

In [41]:
trader = AI_Trader(window_size)

In [42]:
trader.model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 32)                672       
_________________________________________________________________
dense_5 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_6 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 387       
Total params: 11,491
Trainable params: 11,491
Non-trainable params: 0
_________________________________________________________________


### Loop de treinamento

In [43]:
from IPython.display import clear_output, display

In [44]:
def print_registry(buy, sell, total):
    clear_output(wait=True)
    print("AI Trader bought: ", stocks_price_format(buy))
    print("AI Trader sold: ", stocks_price_format(sell), " Profit: " + stocks_price_format(sell - buy))
    print("Total profit: {}".format(total))

In [None]:
for episode in range(1, episodes + 1):
  print("Episode: {}/{}".format(episode, episodes))
  state = state_creator(data, window_size + 1, window_size + 1)
  total_profit = 0
  trader.inventory = []
  for t in range(window_size + 1, data_samples):
    action = trader.trade(state)
    next_state = state_creator(data, t + 1, window_size + 1)
    reward = 0
    
    if action == 1 and len(trader.inventory) == 0: # Comprando uma ação
      trader.inventory.append(data[t])
      
    elif action == 2 and len(trader.inventory) > 0: # Vendendo uma ação  
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print_registry(buy_price, data[t], total_profit)


    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("Total profit: {}".format(total_profit))
      print("########################")
      
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
     
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

AI Trader bought:  $ 212.240005
AI Trader sold:  $ 218.860001  Profit: $ 6.619995
Total profit: 23.240005493164062
