<a href="https://colab.research.google.com/github/mantoan-thi/-Aprendizagem-por-Refor-o-Profunda-para-Negocia-o-no-Mercado-de-A-es/blob/main/Aprendizagem_por_Refor%C3%A7o_Profunda_para_Negocia%C3%A7%C3%A3o_no_Mercado_de_A%C3%A7%C3%B5es.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Etapa 1: Instalações da bibliotecas

In [None]:
#!pip install tensorflow-gpu==2.0.0-alpha0
!pip install tensorflow==2.0.0-beta1
!pip install tensorflow==2.0.0

In [None]:
!pip install pandas-datareader

In [None]:
!pip install --upgrade pandas
!pip install --upgrade pandas-datareader

# Etapa 2: importação das bibliotecas

In [4]:
import math                              # Para construção da função sigmoid
import random                            # Para números aleatórios  
import numpy as np
import pandas as pd
import tensorflow as tf                  
import matplotlib.pyplot as plt
import pandas_datareader as data_reader   
from tqdm import tqdm_notebook,tqdm       # Para barra de progresso mais intuitivas
from collections import deque             # Para memória de re-play

In [5]:
print(tf.__version__)

2.0.0


# Etapa 3: Construção da IA para anegocição das ações

In [6]:
class AI_Trader():
  def __init__(self,state_size,action_space = 3,model_name = "AITrader"):
    self.state_size = state_size       # São as informações que estão vindo do ambiente, camada de entrada da rede neural
    self.action_space = action_space   # São as ações: comprar, vender ou não fazer nada. 
    self.memory = deque(maxlen=2000)   # Experiência de re-play
    self.model_name = model_name

    self.gamma = 0.95                  #
    self.epsilon = 1.0                 # 100% indicar que todas as ações são randomicas, sem utilizar a rede neural
    self.epsilon_final = 0.01          # 
    self.epsilon_decay = 0.995         # Decrementar os valor de espilon
    self.model = self.model_builder()

  def model_builder(self):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=32,activation = "relu",input_dim = self.state_size))
    model.add(tf.keras.layers.Dense(units=64,activation = "relu"))
    model.add(tf.keras.layers.Dense(units=128,activation = "relu"))
    model.add(tf.keras.layers.Dense(units = self.action_space,activation = 'linear'))
    model.compile(loss='mse',optimizer = tf.keras.optimizers.Adam(lr=0.001))
    return model

  def trade(self,state):
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  def batch_train(self,batch_size):
    batch = []
    for i in range(len(self.memory)-batch_size + 1,len(self.memory)):
      batch.append(self.memory[i])
    for state, action, reward, next_state, done in batch:
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward

      self.model.fit(state,target, epochs=1, verbose=0)
    if self.epsilon > self.epsilon_final:
      self.epsilon*=self.epsilon_decay


# Etapa 4: Pré-processamento da base de dados

Definição de funções auxiliares

Sigmoid

In [7]:
def sigmoid(x):
  return 1 /(1 + math.exp(-x))

Formatação de preços

In [8]:
def stocks_prices_format(n): # verifica se vc esta tendo ou não lucro
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

Carregador de base de dados

In [9]:
# Empresas: SABESP,    ELETROBRÁS,  ENERGISA, B3
tickers = {'AAPL','SBSP3.SA', 'ENGI3.SA', 'ELET3.SA','^BVSP'}
stock_name = list(tickers)[0]

In [10]:
def dataset_loader(stock_name):
  dataset = data_reader.DataReader(stock_name,data_source="yahoo")
  star_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  close = dataset['Close']
  return close

Criador de estados

In [11]:
def state_creator(data,timestep,window_size):
  starting_id = timestep - window_size + 1
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep + 1]
  else:
    windowed_data = -starting_id * [data[0]] + list(data[0:timestep + 1])
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i + 1] - windowed_data[i]))
  return np.array([state])

Carregando a base de dados

In [12]:
stock_name = list(tickers)[0]
data = dataset_loader(stock_name)

# Etapa 5: Treinando a IA

Configuração dos hyper parâmentros

In [13]:
window_size = 5
episodes = 1000
batch_size = 32
data_samples = len(data)-1

Definição do modelo

In [14]:
trader = AI_Trader(window_size)

In [15]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


Loop de treinamento

In [None]:
for episode in range(1,episodes + 1):
  print("Episode: {}/{}".format(episode,episodes))
  state = state_creator(data,0,window_size+1)
  total_profit = 0
  trader.inventory= []
  for t in tqdm(range(data_samples)):
    action = trader.trade(state)
    next_state = state_creator(data,t + 1,window_size + 1)
    reward = 0

    if action ==1:# Comprando uma ação
      trader.inventory.append(data[t])
      print("AI Trader bought: ",stocks_prices_format(data[t]))
    elif action ==2 and len(trader.inventory) > 0: # Vendendo uma ação
      buy_price = trader.inventory.pop(0)
    
      reward = max(data[t]-buy_price,0)
      total_profit+= data[t]-buy_price
      print("AI Trader soldd: ",stocks_prices_format(data[t])," Profit: ",stocks_prices_format(data[t]-buy_price))
    if t == data_samples -1:
      done = True
    else:
      done = False
    trader.memory.append((state,action,reward,next_state,done))
    state = next_state

    if done:
      print("##############################")
      print("Total profit: {}",format(total_profit))
      print("##############################")

    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
  if episode % 10 == 0:
    trader.model.save("ai_trade_{}.h5".format(episode))



Episode: 1/1000


  0%|          | 0/1239 [00:00<?, ?it/s]

AI Trader bought:  $ 30.510000
AI Trader bought:  $ 30.190001
AI Trader bought:  $ 30.799999
AI Trader soldd:  $ 30.500000  Profit:  - $ 0.010000
AI Trader bought:  $ 30.900000
AI Trader bought:  $ 30.750000
AI Trader soldd:  $ 30.650000  Profit:  $ 0.459999
AI Trader bought:  $ 30.889999
AI Trader soldd:  $ 31.049999  Profit:  $ 0.250000
AI Trader bought:  $ 30.500000
AI Trader bought:  $ 31.480000
AI Trader soldd:  $ 31.639999  Profit:  $ 0.740000
AI Trader soldd:  $ 31.990000  Profit:  $ 1.240000
AI Trader soldd:  $ 32.110001  Profit:  $ 1.220001
AI Trader soldd:  $ 31.900000  Profit:  $ 1.400000
AI Trader soldd:  $ 32.290001  Profit:  $ 0.810001


  3%|▎         | 35/1239 [00:12<10:18,  1.95it/s]

AI Trader bought:  $ 32.750000


  3%|▎         | 36/1239 [00:16<15:22,  1.30it/s]

AI Trader bought:  $ 32.230000


  3%|▎         | 37/1239 [00:20<21:31,  1.07s/it]

AI Trader soldd:  $ 32.340000  Profit:  - $ 0.410000


  3%|▎         | 38/1239 [00:24<28:42,  1.43s/it]

AI Trader bought:  $ 33.240002


  3%|▎         | 39/1239 [00:28<34:57,  1.75s/it]

AI Trader soldd:  $ 33.250000  Profit:  $ 1.020000


  3%|▎         | 42/1239 [00:40<57:37,  2.89s/it]

AI Trader bought:  $ 27.110001


  3%|▎         | 43/1239 [00:44<1:00:30,  3.04s/it]

AI Trader soldd:  $ 27.500000  Profit:  - $ 5.740002


  4%|▎         | 44/1239 [00:47<1:03:18,  3.18s/it]

AI Trader bought:  $ 29.240000


  4%|▎         | 45/1239 [00:52<1:10:50,  3.56s/it]

AI Trader soldd:  $ 28.450001  Profit:  $ 1.340000


  4%|▍         | 47/1239 [00:59<1:11:00,  3.57s/it]

AI Trader bought:  $ 29.530001


  4%|▍         | 48/1239 [01:04<1:18:47,  3.97s/it]

AI Trader soldd:  $ 30.030001  Profit:  $ 0.790001


  4%|▍         | 49/1239 [01:08<1:16:28,  3.86s/it]

AI Trader soldd:  $ 29.680000  Profit:  $ 0.150000


  4%|▍         | 53/1239 [01:23<1:17:53,  3.94s/it]

AI Trader bought:  $ 29.600000


  5%|▍         | 56/1239 [01:34<1:11:47,  3.64s/it]

AI Trader soldd:  $ 28.780001  Profit:  - $ 0.820000


  5%|▍         | 57/1239 [01:39<1:22:38,  4.20s/it]

AI Trader bought:  $ 28.420000


  5%|▍         | 58/1239 [01:43<1:18:43,  4.00s/it]

AI Trader soldd:  $ 28.290001  Profit:  - $ 0.129999


  5%|▍         | 59/1239 [01:47<1:16:37,  3.90s/it]

AI Trader bought:  $ 27.870001


  5%|▍         | 60/1239 [01:50<1:14:49,  3.81s/it]

AI Trader bought:  $ 27.680000


  5%|▌         | 63/1239 [02:03<1:25:45,  4.38s/it]

AI Trader bought:  $ 28.309999


  5%|▌         | 64/1239 [02:07<1:20:29,  4.11s/it]

AI Trader soldd:  $ 27.700001  Profit:  - $ 0.170000


  5%|▌         | 67/1239 [02:17<1:12:47,  3.73s/it]

AI Trader bought:  $ 26.910000


  5%|▌         | 68/1239 [02:21<1:11:34,  3.67s/it]

AI Trader soldd:  $ 26.580000  Profit:  - $ 1.100000


  6%|▌         | 70/1239 [02:31<1:26:29,  4.44s/it]

AI Trader bought:  $ 27.350000


  6%|▌         | 71/1239 [02:34<1:21:14,  4.17s/it]

AI Trader bought:  $ 27.430000


  6%|▌         | 73/1239 [02:41<1:14:20,  3.83s/it]

AI Trader soldd:  $ 27.559999  Profit:  - $ 0.750000


  6%|▌         | 75/1239 [02:48<1:11:29,  3.69s/it]

AI Trader soldd:  $ 28.790001  Profit:  $ 1.880001


  6%|▌         | 76/1239 [02:52<1:10:40,  3.65s/it]

AI Trader bought:  $ 28.370001


  6%|▋         | 78/1239 [02:59<1:09:53,  3.61s/it]

AI Trader soldd:  $ 29.530001  Profit:  $ 2.180000


  6%|▋         | 79/1239 [03:06<1:30:27,  4.68s/it]

AI Trader bought:  $ 29.610001


  6%|▋         | 80/1239 [03:10<1:24:34,  4.38s/it]

AI Trader soldd:  $ 29.180000  Profit:  $ 1.750000


  7%|▋         | 81/1239 [03:14<1:19:59,  4.14s/it]

AI Trader soldd:  $ 29.309999  Profit:  $ 0.939999


  7%|▋         | 82/1239 [03:17<1:16:15,  3.95s/it]

AI Trader bought:  $ 29.250000


  7%|▋         | 83/1239 [03:21<1:13:37,  3.82s/it]

AI Trader soldd:  $ 29.420000  Profit:  - $ 0.190001


  7%|▋         | 84/1239 [03:24<1:12:02,  3.74s/it]

AI Trader soldd:  $ 31.139999  Profit:  $ 1.889999


  7%|▋         | 90/1239 [03:45<1:06:49,  3.49s/it]

AI Trader bought:  $ 32.009998


  7%|▋         | 91/1239 [03:53<1:31:27,  4.78s/it]

AI Trader soldd:  $ 32.000000  Profit:  - $ 0.009998


  8%|▊         | 98/1239 [04:17<1:08:24,  3.60s/it]

AI Trader bought:  $ 31.820000


  8%|▊         | 99/1239 [04:21<1:07:35,  3.56s/it]

AI Trader bought:  $ 32.349998


  8%|▊         | 101/1239 [04:28<1:07:04,  3.54s/it]

AI Trader soldd:  $ 32.709999  Profit:  $ 0.889999


  8%|▊         | 102/1239 [04:31<1:07:05,  3.54s/it]

AI Trader soldd:  $ 32.680000  Profit:  $ 0.330002


  8%|▊         | 105/1239 [04:47<1:37:54,  5.18s/it]

AI Trader bought:  $ 33.990002


  9%|▊         | 106/1239 [04:52<1:36:01,  5.08s/it]

AI Trader soldd:  $ 33.299999  Profit:  - $ 0.690002


  9%|▉         | 113/1239 [05:19<1:12:33,  3.87s/it]

AI Trader bought:  $ 33.139999


  9%|▉         | 116/1239 [05:30<1:09:02,  3.69s/it]

AI Trader soldd:  $ 33.139999  Profit:  $ 0.000000


 10%|▉         | 121/1239 [05:48<1:07:41,  3.63s/it]

AI Trader bought:  $ 31.340000


 10%|▉         | 122/1239 [05:58<1:44:00,  5.59s/it]

AI Trader soldd:  $ 31.879999  Profit:  $ 0.539999


 10%|█         | 126/1239 [06:12<1:16:11,  4.11s/it]

AI Trader bought:  $ 32.349998


 10%|█         | 127/1239 [06:16<1:13:37,  3.97s/it]

AI Trader bought:  $ 31.860001


 10%|█         | 128/1239 [06:20<1:11:45,  3.88s/it]

AI Trader soldd:  $ 30.870001  Profit:  - $ 1.479998


 10%|█         | 129/1239 [06:23<1:10:47,  3.83s/it]

AI Trader soldd:  $ 30.200001  Profit:  - $ 1.660000


 11%|█         | 133/1239 [06:38<1:07:08,  3.64s/it]

AI Trader bought:  $ 31.600000


 11%|█         | 135/1239 [06:45<1:07:25,  3.66s/it]

AI Trader bought:  $ 32.549999


 11%|█         | 136/1239 [06:49<1:07:52,  3.69s/it]

AI Trader bought:  $ 32.419998


 11%|█         | 137/1239 [06:53<1:08:02,  3.70s/it]

AI Trader soldd:  $ 32.590000  Profit:  $ 0.990000


 11%|█         | 138/1239 [06:56<1:08:22,  3.73s/it]

AI Trader bought:  $ 33.500000


 11%|█         | 139/1239 [07:00<1:08:52,  3.76s/it]

AI Trader bought:  $ 33.750000


 11%|█▏        | 140/1239 [07:04<1:08:32,  3.74s/it]

AI Trader soldd:  $ 34.139999  Profit:  $ 1.590000


 11%|█▏        | 141/1239 [07:07<1:07:45,  3.70s/it]

AI Trader soldd:  $ 33.209999  Profit:  $ 0.790001


 11%|█▏        | 142/1239 [07:11<1:07:39,  3.70s/it]

AI Trader soldd:  $ 33.700001  Profit:  $ 0.200001


 12%|█▏        | 143/1239 [07:22<1:49:08,  5.97s/it]

AI Trader soldd:  $ 32.810001  Profit:  - $ 0.939999


 12%|█▏        | 145/1239 [07:30<1:27:07,  4.78s/it]

AI Trader bought:  $ 32.549999


 12%|█▏        | 147/1239 [07:37<1:16:13,  4.19s/it]

AI Trader soldd:  $ 32.340000  Profit:  - $ 0.209999


 12%|█▏        | 149/1239 [07:44<1:11:02,  3.91s/it]

AI Trader bought:  $ 32.599998


 12%|█▏        | 150/1239 [07:48<1:09:37,  3.84s/it]

AI Trader soldd:  $ 30.780001  Profit:  - $ 1.819998


 12%|█▏        | 151/1239 [07:52<1:08:18,  3.77s/it]

AI Trader bought:  $ 30.350000


 12%|█▏        | 152/1239 [07:55<1:07:05,  3.70s/it]

AI Trader bought:  $ 29.580000


 12%|█▏        | 153/1239 [07:59<1:06:15,  3.66s/it]

AI Trader soldd:  $ 30.040001  Profit:  - $ 0.309999


 12%|█▏        | 154/1239 [08:02<1:06:10,  3.66s/it]

AI Trader soldd:  $ 29.200001  Profit:  - $ 0.379999


 13%|█▎        | 156/1239 [08:09<1:04:50,  3.59s/it]

AI Trader bought:  $ 29.850000


 13%|█▎        | 157/1239 [08:13<1:04:19,  3.57s/it]

AI Trader soldd:  $ 30.400000  Profit:  $ 0.549999


 13%|█▎        | 165/1239 [08:41<1:03:41,  3.56s/it]

AI Trader bought:  $ 30.250000


 13%|█▎        | 166/1239 [08:45<1:04:11,  3.59s/it]

AI Trader bought:  $ 32.310001


 13%|█▎        | 167/1239 [08:49<1:04:35,  3.62s/it]

AI Trader soldd:  $ 31.389999  Profit:  $ 1.139999


 14%|█▎        | 169/1239 [09:05<1:39:04,  5.56s/it]

AI Trader soldd:  $ 28.700001  Profit:  - $ 3.610001


 14%|█▎        | 170/1239 [09:09<1:28:38,  4.97s/it]

AI Trader bought:  $ 27.559999


 14%|█▍        | 171/1239 [09:13<1:23:37,  4.70s/it]

AI Trader soldd:  $ 28.340000  Profit:  $ 0.780001


 14%|█▍        | 173/1239 [09:21<1:18:55,  4.44s/it]

AI Trader bought:  $ 28.740000


 14%|█▍        | 174/1239 [09:25<1:14:02,  4.17s/it]

AI Trader soldd:  $ 29.290001  Profit:  $ 0.550001


 14%|█▍        | 175/1239 [09:28<1:11:01,  4.01s/it]

AI Trader bought:  $ 29.290001


 14%|█▍        | 176/1239 [09:32<1:09:08,  3.90s/it]

AI Trader soldd:  $ 29.090000  Profit:  - $ 0.200001


 14%|█▍        | 179/1239 [09:42<1:03:51,  3.61s/it]

AI Trader bought:  $ 29.100000


 15%|█▍        | 180/1239 [09:46<1:03:08,  3.58s/it]

AI Trader bought:  $ 29.209999


 15%|█▍        | 181/1239 [09:50<1:03:03,  3.58s/it]

AI Trader soldd:  $ 29.820000  Profit:  $ 0.719999


 15%|█▍        | 182/1239 [09:53<1:02:57,  3.57s/it]

AI Trader soldd:  $ 29.719999  Profit:  $ 0.510000


 15%|█▌        | 190/1239 [10:22<1:02:15,  3.56s/it]

AI Trader bought:  $ 31.250000


 16%|█▌        | 193/1239 [10:32<1:01:39,  3.54s/it]

AI Trader bought:  $ 31.000000


 16%|█▌        | 196/1239 [10:43<1:02:02,  3.57s/it]

AI Trader bought:  $ 30.469999


 16%|█▌        | 198/1239 [10:50<1:02:54,  3.63s/it]

AI Trader soldd:  $ 30.809999  Profit:  - $ 0.440001


 16%|█▌        | 199/1239 [10:54<1:02:55,  3.63s/it]

AI Trader soldd:  $ 31.660000  Profit:  $ 0.660000


 16%|█▌        | 200/1239 [11:09<2:04:21,  7.18s/it]

AI Trader bought:  $ 31.870001


 16%|█▋        | 202/1239 [11:17<1:33:23,  5.40s/it]

AI Trader soldd:  $ 31.900000  Profit:  $ 1.430000


 16%|█▋        | 203/1239 [11:20<1:24:09,  4.87s/it]

AI Trader soldd:  $ 31.700001  Profit:  - $ 0.170000


 18%|█▊        | 219/1239 [12:18<1:00:58,  3.59s/it]

AI Trader bought:  $ 34.090000


 18%|█▊        | 220/1239 [12:21<1:00:47,  3.58s/it]

AI Trader soldd:  $ 33.549999  Profit:  - $ 0.540001


 19%|█▊        | 231/1239 [13:01<1:01:40,  3.67s/it]

AI Trader bought:  $ 30.120001


 19%|█▊        | 232/1239 [13:05<1:01:40,  3.67s/it]

AI Trader bought:  $ 30.500000


 19%|█▉        | 233/1239 [13:08<1:01:03,  3.64s/it]

AI Trader bought:  $ 30.129999


 19%|█▉        | 234/1239 [13:12<1:00:59,  3.64s/it]

AI Trader soldd:  $ 30.639999  Profit:  $ 0.519999


 19%|█▉        | 236/1239 [13:19<59:52,  3.58s/it]  

AI Trader bought:  $ 31.049999


 19%|█▉        | 237/1239 [13:23<59:37,  3.57s/it]

AI Trader soldd:  $ 30.969999  Profit:  $ 0.469999


 19%|█▉        | 238/1239 [13:26<59:36,  3.57s/it]

AI Trader soldd:  $ 30.940001  Profit:  $ 0.810001


 19%|█▉        | 239/1239 [13:44<2:12:51,  7.97s/it]

AI Trader soldd:  $ 31.000000  Profit:  - $ 0.049999


 19%|█▉        | 240/1239 [13:48<1:51:22,  6.69s/it]

AI Trader bought:  $ 30.959999


 19%|█▉        | 241/1239 [13:52<1:36:21,  5.79s/it]

AI Trader soldd:  $ 31.600000  Profit:  $ 0.640001


 20%|█▉        | 242/1239 [13:56<1:26:30,  5.21s/it]

AI Trader bought:  $ 32.000000


 20%|█▉        | 243/1239 [13:59<1:19:01,  4.76s/it]

AI Trader soldd:  $ 32.150002  Profit:  $ 0.150002


 20%|██        | 253/1239 [14:35<59:51,  3.64s/it]  

AI Trader bought:  $ 32.959999


 21%|██        | 254/1239 [14:39<59:44,  3.64s/it]

AI Trader soldd:  $ 33.490002  Profit:  $ 0.530003


 21%|██        | 255/1239 [14:42<59:56,  3.66s/it]

AI Trader bought:  $ 33.290001


 21%|██        | 256/1239 [14:46<1:00:28,  3.69s/it]

AI Trader soldd:  $ 32.549999  Profit:  - $ 0.740002


 21%|██        | 258/1239 [14:54<1:01:20,  3.75s/it]

AI Trader bought:  $ 33.509998


 21%|██        | 259/1239 [14:58<1:01:01,  3.74s/it]

AI Trader soldd:  $ 34.230000  Profit:  $ 0.720001
