## Nessa aula vamos aprender como trabalhar em detalhes com séries temporais

1. Uma série temporal pode depender de outra série?
    1. Se pedender como trabalhamos com essa dependência?

2. Como é feita a avaliação de uma série temporal?

<img src="wfcv.png" />

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lightgbm as lgb
import plotly.graph_objs as go

import optuna
import shap 

from sklearn.metrics import mean_squared_error

### Aqui vamos tentar prever o preço das ações da Petrobrás

In [2]:
preco_pb = pd.read_csv('PETR4.SA.csv')
del preco_pb['Volume']
preco_pb.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close
0,2015-07-02,12.35,12.53,12.16,12.35,11.457355
1,2015-07-03,12.25,12.31,11.68,11.76,10.909998
2,2015-07-06,11.51,11.73,11.28,11.51,10.67807
3,2015-07-07,11.36,11.85,10.82,11.77,10.919277
4,2015-07-08,11.62,11.99,11.33,11.46,10.631682


### Vamos assumir que o preço da ação da empresa seja influenciado pelo preço do petroleo 

In [3]:
preco_petro = pd.read_csv('CL=F.csv')
del preco_petro['Volume']
preco_petro.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close
0,2015-07-02,56.869999,57.950001,56.5,56.93,56.93
1,2015-07-05,,,,,
2,2015-07-06,54.900002,55.34,52.41,52.529999,52.529999
3,2015-07-07,52.75,53.43,50.580002,52.330002,52.330002
4,2015-07-08,52.91,52.959999,50.91,51.650002,51.650002


### E o preço do real será que tem alguma influência?

In [4]:
preco_real = pd.read_csv('BR=F.csv')
del preco_real['Volume']
preco_real.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close
0,2015-07-02,0.3189,0.3189,0.3189,0.3189,0.3189
1,2015-07-05,,,,,
2,2015-07-06,0.3146,0.3146,0.3146,0.3146,0.3146
3,2015-07-07,0.31035,0.312,0.3098,0.31035,0.31035
4,2015-07-08,0.30675,0.31075,0.30585,0.30675,0.30675


### Vamos começar com alguns merges?

In [5]:
pb_petro = preco_pb.merge(preco_petro, on = 'Date', how = 'left')
pb_petro.head()

Unnamed: 0,Date,Open_x,High_x,Low_x,Close_x,Adj Close_x,Open_y,High_y,Low_y,Close_y,Adj Close_y
0,2015-07-02,12.35,12.53,12.16,12.35,11.457355,56.869999,57.950001,56.5,56.93,56.93
1,2015-07-03,12.25,12.31,11.68,11.76,10.909998,,,,,
2,2015-07-06,11.51,11.73,11.28,11.51,10.67807,54.900002,55.34,52.41,52.529999,52.529999
3,2015-07-07,11.36,11.85,10.82,11.77,10.919277,52.75,53.43,50.580002,52.330002,52.330002
4,2015-07-08,11.62,11.99,11.33,11.46,10.631682,52.91,52.959999,50.91,51.650002,51.650002


In [6]:
pb_petro_real = pb_petro.merge(preco_real, on = 'Date', how = 'left')
pb_petro_real.head()

Unnamed: 0,Date,Open_x,High_x,Low_x,Close_x,Adj Close_x,Open_y,High_y,Low_y,Close_y,Adj Close_y,Open,High,Low,Close,Adj Close
0,2015-07-02,12.35,12.53,12.16,12.35,11.457355,56.869999,57.950001,56.5,56.93,56.93,0.3189,0.3189,0.3189,0.3189,0.3189
1,2015-07-03,12.25,12.31,11.68,11.76,10.909998,,,,,,,,,,
2,2015-07-06,11.51,11.73,11.28,11.51,10.67807,54.900002,55.34,52.41,52.529999,52.529999,0.3146,0.3146,0.3146,0.3146,0.3146
3,2015-07-07,11.36,11.85,10.82,11.77,10.919277,52.75,53.43,50.580002,52.330002,52.330002,0.31035,0.312,0.3098,0.31035,0.31035
4,2015-07-08,11.62,11.99,11.33,11.46,10.631682,52.91,52.959999,50.91,51.650002,51.650002,0.30675,0.31075,0.30585,0.30675,0.30675


### Renomear as colunas para facilitar nossa vida

In [7]:
nomes={ 'Date' : 'Data',
        "Open_x": "Preco_Abertura_Petrobras", 
        "High_x": "Preco_Alta_Petrobras",
        "Low_x": "Preco_Baixo_Petrobras",
        "Close_x": "Preco_Fechamento_Petrobras",
        "Adj Close_x" : "Preco_Corrigido_Petrobras",
        "Open_y": "Preco_Abertura_Petróleo", 
        "High_y": "Preco_Alta_Petróleo",
        "Low_y": "Preco_Baixo_Petróleo",
        "Close_y": "Preco_Fechamento_Petróleo",
        "Adj Close_y" : "Preco_Corrigido_Petróleo",
        "Open": "Preco_Abertura_Real", 
        "High": "Preco_Alta_Real",
        "Low": "Preco_Baixo_Real",
        "Close": "Preco_Fechamento_Real",
        "Adj Close" : "Preco_Corrigido_Real"}

pb_petro_real.rename(columns = nomes, inplace = True)
pb_petro_real.head()

Unnamed: 0,Data,Preco_Abertura_Petrobras,Preco_Alta_Petrobras,Preco_Baixo_Petrobras,Preco_Fechamento_Petrobras,Preco_Corrigido_Petrobras,Preco_Abertura_Petróleo,Preco_Alta_Petróleo,Preco_Baixo_Petróleo,Preco_Fechamento_Petróleo,Preco_Corrigido_Petróleo,Preco_Abertura_Real,Preco_Alta_Real,Preco_Baixo_Real,Preco_Fechamento_Real,Preco_Corrigido_Real
0,2015-07-02,12.35,12.53,12.16,12.35,11.457355,56.869999,57.950001,56.5,56.93,56.93,0.3189,0.3189,0.3189,0.3189,0.3189
1,2015-07-03,12.25,12.31,11.68,11.76,10.909998,,,,,,,,,,
2,2015-07-06,11.51,11.73,11.28,11.51,10.67807,54.900002,55.34,52.41,52.529999,52.529999,0.3146,0.3146,0.3146,0.3146,0.3146
3,2015-07-07,11.36,11.85,10.82,11.77,10.919277,52.75,53.43,50.580002,52.330002,52.330002,0.31035,0.312,0.3098,0.31035,0.31035
4,2015-07-08,11.62,11.99,11.33,11.46,10.631682,52.91,52.959999,50.91,51.650002,51.650002,0.30675,0.31075,0.30585,0.30675,0.30675


In [8]:
pb_petro_real.dtypes

Data                           object
Preco_Abertura_Petrobras      float64
Preco_Alta_Petrobras          float64
Preco_Baixo_Petrobras         float64
Preco_Fechamento_Petrobras    float64
Preco_Corrigido_Petrobras     float64
Preco_Abertura_Petróleo       float64
Preco_Alta_Petróleo           float64
Preco_Baixo_Petróleo          float64
Preco_Fechamento_Petróleo     float64
Preco_Corrigido_Petróleo      float64
Preco_Abertura_Real           float64
Preco_Alta_Real               float64
Preco_Baixo_Real              float64
Preco_Fechamento_Real         float64
Preco_Corrigido_Real          float64
dtype: object

In [9]:
#Lembre de sempre definir a coluna que contém a data como datetime
pb_petro_real['Data'] = pd.to_datetime(pb_petro_real['Data'])

pb_petro_real.dtypes

Data                          datetime64[ns]
Preco_Abertura_Petrobras             float64
Preco_Alta_Petrobras                 float64
Preco_Baixo_Petrobras                float64
Preco_Fechamento_Petrobras           float64
Preco_Corrigido_Petrobras            float64
Preco_Abertura_Petróleo              float64
Preco_Alta_Petróleo                  float64
Preco_Baixo_Petróleo                 float64
Preco_Fechamento_Petróleo            float64
Preco_Corrigido_Petróleo             float64
Preco_Abertura_Real                  float64
Preco_Alta_Real                      float64
Preco_Baixo_Real                     float64
Preco_Fechamento_Real                float64
Preco_Corrigido_Real                 float64
dtype: object

In [10]:
## separar a variável alvo

y = pb_petro_real['Preco_Corrigido_Petrobras']
del pb_petro_real['Preco_Corrigido_Petrobras']

colunas = pb_petro_real.select_dtypes(exclude=['datetime64']).columns.tolist()

In [15]:
janela=30
for i in range (0, len(colunas)):
    pb_petro_real[str(colunas[i]+'_Media')] = pb_petro_real[str(colunas[i])].\
            rolling(window=janela).mean().round(4)

for i in range (0, len(colunas)):
    pb_petro_real[str(colunas[i]+'_%')] = pb_petro_real[str(colunas[i])].pct_change(janela-1).round(4)

In [20]:
pb_petro_real[29:32].T

Unnamed: 0,29,30,31
Data,2015-08-13 00:00:00,2015-08-14 00:00:00,2015-08-17 00:00:00
Preco_Abertura_Petrobras,9.91,9.61,9.2
Preco_Alta_Petrobras,9.97,9.68,9.36
Preco_Baixo_Petrobras,9.5,9.3,9.12
Preco_Fechamento_Petrobras,9.5,9.3,9.15
Preco_Abertura_Petróleo,43.34,42.23,42.18
Preco_Alta_Petróleo,43.72,42.96,42.69
Preco_Baixo_Petróleo,41.91,41.35,41.64
Preco_Fechamento_Petróleo,42.23,42.5,41.87
Preco_Corrigido_Petróleo,42.23,42.5,41.87


In [21]:
for i in range (0, len(colunas)):
    for j in range (int(i+1), len(colunas)):

        pb_petro_real['Correlacao_'+colunas[i]+colunas[j]] = \
            pb_petro_real[colunas[i]].rolling(window=janela).corr(pb_petro_real[colunas[j]]).round(4)
                

In [28]:
pb_petro_real[29:32].T

Unnamed: 0,29,30,31
Data,2015-08-13 00:00:00,2015-08-14 00:00:00,2015-08-17 00:00:00
Preco_Abertura_Petrobras,9.91,9.61,9.2
Preco_Alta_Petrobras,9.97,9.68,9.36
Preco_Baixo_Petrobras,9.5,9.3,9.12
Preco_Fechamento_Petrobras,9.5,9.3,9.15
...,...,...,...
Correlacao_Preco_Alta_RealPreco_Fechamento_Real,,,0.9945
Correlacao_Preco_Alta_RealPreco_Corrigido_Real,,,0.9945
Correlacao_Preco_Baixo_RealPreco_Fechamento_Real,,,0.9935
Correlacao_Preco_Baixo_RealPreco_Corrigido_Real,,,0.9935


In [29]:
colunas2 = pb_petro_real.select_dtypes(exclude=['datetime64']).columns.tolist()

In [30]:
for i in range (0, len(colunas2)):
    pb_petro_real[colunas2[i]] = pb_petro_real[colunas2[i]].shift(periods=janela)

In [31]:
pd.options.display.max_rows = None
pb_petro_real[29:32].T

Unnamed: 0,29,30,31
Data,2015-08-13,2015-08-14 00:00:00,2015-08-17 00:00:00
Preco_Abertura_Petrobras,NaT,12.35,12.25
Preco_Alta_Petrobras,NaT,12.53,12.31
Preco_Baixo_Petrobras,NaT,12.16,11.68
Preco_Fechamento_Petrobras,NaT,12.35,11.76
Preco_Abertura_Petróleo,NaT,56.87,
Preco_Alta_Petróleo,NaT,57.95,
Preco_Baixo_Petróleo,NaT,56.5,
Preco_Fechamento_Petróleo,NaT,56.93,
Preco_Corrigido_Petróleo,NaT,56.93,


In [32]:
## Lembrar de remover colunas com alta correlação
# Matriz de Correlação
corr_matrix = pb_petro_real.corr().abs()
corr_matrix

Unnamed: 0,Preco_Abertura_Petrobras,Preco_Alta_Petrobras,Preco_Baixo_Petrobras,Preco_Fechamento_Petrobras,Preco_Abertura_Petróleo,Preco_Alta_Petróleo,Preco_Baixo_Petróleo,Preco_Fechamento_Petróleo,Preco_Corrigido_Petróleo,Preco_Abertura_Real,...,Correlacao_Preco_Abertura_RealPreco_Alta_Real,Correlacao_Preco_Abertura_RealPreco_Baixo_Real,Correlacao_Preco_Abertura_RealPreco_Fechamento_Real,Correlacao_Preco_Abertura_RealPreco_Corrigido_Real,Correlacao_Preco_Alta_RealPreco_Baixo_Real,Correlacao_Preco_Alta_RealPreco_Fechamento_Real,Correlacao_Preco_Alta_RealPreco_Corrigido_Real,Correlacao_Preco_Baixo_RealPreco_Fechamento_Real,Correlacao_Preco_Baixo_RealPreco_Corrigido_Real,Correlacao_Preco_Fechamento_RealPreco_Corrigido_Real
Preco_Abertura_Petrobras,1.0,0.999363,0.998994,0.998407,0.586942,0.591968,0.568908,0.582803,0.582803,0.31566,...,0.039449,0.193761,0.031173,0.031173,0.22115,0.041442,0.041442,0.187906,0.187906,
Preco_Alta_Petrobras,0.999363,1.0,0.998735,0.999107,0.584659,0.590268,0.566425,0.581009,0.581009,0.321645,...,0.042097,0.198187,0.02619,0.02619,0.223139,0.044127,0.044127,0.192739,0.192739,
Preco_Baixo_Petrobras,0.998994,0.998735,1.0,0.999276,0.589234,0.594271,0.571898,0.585596,0.585596,0.307286,...,0.034949,0.191592,0.035352,0.035352,0.219289,0.036874,0.036874,0.185288,0.185288,
Preco_Fechamento_Petrobras,0.998407,0.999107,0.999276,1.0,0.586594,0.592144,0.569105,0.583514,0.583514,0.313876,...,0.037937,0.194847,0.028542,0.028542,0.221441,0.039951,0.039951,0.189221,0.189221,
Preco_Abertura_Petróleo,0.586942,0.584659,0.589234,0.586594,1.0,0.99786,0.988309,0.994179,0.994179,0.200439,...,9.1e-05,0.141335,0.622879,0.622879,0.088429,0.013238,0.013238,0.084906,0.084906,
Preco_Alta_Petróleo,0.591968,0.590268,0.594271,0.592144,0.99786,1.0,0.988455,0.996636,0.996636,0.18682,...,0.008595,0.152696,0.614143,0.614143,0.097504,0.004363,0.004363,0.09669,0.09669,
Preco_Baixo_Petróleo,0.568908,0.566425,0.571898,0.569105,0.988309,0.988455,1.0,0.993552,0.993552,0.22009,...,0.009558,0.130381,0.648358,0.648358,0.076221,0.022817,0.022817,0.071388,0.071388,
Preco_Fechamento_Petróleo,0.582803,0.581009,0.585596,0.583514,0.994179,0.996636,0.993552,1.0,1.0,0.202238,...,0.000158,0.143695,0.635195,0.635195,0.088253,0.01335,0.01335,0.085804,0.085804,
Preco_Corrigido_Petróleo,0.582803,0.581009,0.585596,0.583514,0.994179,0.996636,0.993552,1.0,1.0,0.202238,...,0.000158,0.143695,0.635195,0.635195,0.088253,0.01335,0.01335,0.085804,0.085804,
Preco_Abertura_Real,0.31566,0.321645,0.307286,0.313876,0.200439,0.18682,0.22009,0.202238,0.202238,1.0,...,0.247858,0.296485,0.570915,0.570915,0.21915,0.259878,0.259878,0.345674,0.345674,


In [33]:
# Selecionar os elementos da parte superior da diagonal
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
upper

Unnamed: 0,Preco_Abertura_Petrobras,Preco_Alta_Petrobras,Preco_Baixo_Petrobras,Preco_Fechamento_Petrobras,Preco_Abertura_Petróleo,Preco_Alta_Petróleo,Preco_Baixo_Petróleo,Preco_Fechamento_Petróleo,Preco_Corrigido_Petróleo,Preco_Abertura_Real,...,Correlacao_Preco_Abertura_RealPreco_Alta_Real,Correlacao_Preco_Abertura_RealPreco_Baixo_Real,Correlacao_Preco_Abertura_RealPreco_Fechamento_Real,Correlacao_Preco_Abertura_RealPreco_Corrigido_Real,Correlacao_Preco_Alta_RealPreco_Baixo_Real,Correlacao_Preco_Alta_RealPreco_Fechamento_Real,Correlacao_Preco_Alta_RealPreco_Corrigido_Real,Correlacao_Preco_Baixo_RealPreco_Fechamento_Real,Correlacao_Preco_Baixo_RealPreco_Corrigido_Real,Correlacao_Preco_Fechamento_RealPreco_Corrigido_Real
Preco_Abertura_Petrobras,,0.999363,0.998994,0.998407,0.586942,0.591968,0.568908,0.582803,0.582803,0.31566,...,0.039449,0.193761,0.031173,0.031173,0.22115,0.041442,0.041442,0.187906,0.187906,
Preco_Alta_Petrobras,,,0.998735,0.999107,0.584659,0.590268,0.566425,0.581009,0.581009,0.321645,...,0.042097,0.198187,0.02619,0.02619,0.223139,0.044127,0.044127,0.192739,0.192739,
Preco_Baixo_Petrobras,,,,0.999276,0.589234,0.594271,0.571898,0.585596,0.585596,0.307286,...,0.034949,0.191592,0.035352,0.035352,0.219289,0.036874,0.036874,0.185288,0.185288,
Preco_Fechamento_Petrobras,,,,,0.586594,0.592144,0.569105,0.583514,0.583514,0.313876,...,0.037937,0.194847,0.028542,0.028542,0.221441,0.039951,0.039951,0.189221,0.189221,
Preco_Abertura_Petróleo,,,,,,0.99786,0.988309,0.994179,0.994179,0.200439,...,9.1e-05,0.141335,0.622879,0.622879,0.088429,0.013238,0.013238,0.084906,0.084906,
Preco_Alta_Petróleo,,,,,,,0.988455,0.996636,0.996636,0.18682,...,0.008595,0.152696,0.614143,0.614143,0.097504,0.004363,0.004363,0.09669,0.09669,
Preco_Baixo_Petróleo,,,,,,,,0.993552,0.993552,0.22009,...,0.009558,0.130381,0.648358,0.648358,0.076221,0.022817,0.022817,0.071388,0.071388,
Preco_Fechamento_Petróleo,,,,,,,,,1.0,0.202238,...,0.000158,0.143695,0.635195,0.635195,0.088253,0.01335,0.01335,0.085804,0.085804,
Preco_Corrigido_Petróleo,,,,,,,,,,0.202238,...,0.000158,0.143695,0.635195,0.635195,0.088253,0.01335,0.01335,0.085804,0.085804,
Preco_Abertura_Real,,,,,,,,,,,...,0.247858,0.296485,0.570915,0.570915,0.21915,0.259878,0.259878,0.345674,0.345674,


In [34]:
# Selecionar as colunas para deletar
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]

In [35]:
pb_petro_real = pb_petro_real.drop(columns=to_drop)

In [36]:
pb_petro_real['Dia_do_Ano'] = pb_petro_real.Data.apply(lambda x: x.dayofyear)
pb_petro_real['Semana_do_Ano'] = pb_petro_real.Data.apply(lambda x: x.weekofyear)

In [37]:
pb_petro_real['Preco_Acao_Fechamento'] = y
pb_petro_real.shape

(1244, 31)

In [38]:
data = pb_petro_real['Data']
del pb_petro_real['Data']

In [39]:
pb_petro_real = pb_petro_real.interpolate(method='linear', limit_direction='forward', axis=1)
pb_petro_real['Data'] = data

In [40]:
pb_petro_real.dropna(how='any', inplace = True)

In [41]:
pb_petro_real.shape

(1211, 31)

In [42]:

pb_petro_real.head()

Unnamed: 0,Preco_Abertura_Petrobras,Preco_Abertura_Petróleo,Preco_Abertura_Real,Preco_Abertura_Petrobras_%,Preco_Abertura_Petróleo_%,Preco_Baixo_Petróleo_%,Preco_Abertura_Real_%,Correlacao_Preco_Abertura_PetrobrasPreco_Alta_Petrobras,Correlacao_Preco_Abertura_PetrobrasPreco_Baixo_Petrobras,Correlacao_Preco_Abertura_PetrobrasPreco_Fechamento_Petrobras,...,Correlacao_Preco_Fechamento_PetróleoPreco_Corrigido_Petróleo,Correlacao_Preco_Abertura_RealPreco_Alta_Real,Correlacao_Preco_Abertura_RealPreco_Baixo_Real,Correlacao_Preco_Abertura_RealPreco_Fechamento_Real,Correlacao_Preco_Alta_RealPreco_Baixo_Real,Correlacao_Preco_Fechamento_RealPreco_Corrigido_Real,Dia_do_Ano,Semana_do_Ano,Preco_Acao_Fechamento,Data
30,12.35,56.869999,0.3189,9.346144,18.373388,27.400632,36.427876,45.45512,54.482364,63.509608,...,171.836536,180.86378,189.891024,198.918268,207.945512,216.972756,226.0,33.0,8.627806,2015-08-14
31,12.25,20.277778,28.305556,36.333333,44.361111,52.388889,60.416667,68.444444,76.472222,84.5,...,180.833333,188.861111,196.888889,204.916667,212.944444,220.972222,229.0,34.0,8.488647,2015-08-17
32,11.51,54.900002,0.3146,9.502016,18.689432,27.876848,37.064264,46.25168,55.439096,64.626512,...,174.875504,184.06292,193.250336,202.437752,211.625168,220.812584,230.0,34.0,8.340212,2015-08-18
33,11.36,52.75,0.31035,9.537936,18.765522,27.993108,37.220694,46.44828,55.675866,64.903452,...,175.634484,184.86207,194.089656,203.317242,212.544828,221.772414,231.0,34.0,8.108282,2015-08-19
34,11.62,52.91,0.30675,9.57448,18.84221,28.10994,37.37767,46.6454,55.91313,65.18086,...,176.39362,185.66135,194.92908,204.19681,213.46454,222.73227,232.0,34.0,8.061894,2015-08-20


In [43]:
pb_petro_real['Data'].min()

Timestamp('2015-08-14 00:00:00')

In [44]:
pb_petro_real['Data'].max()

Timestamp('2020-07-01 00:00:00')

In [45]:
pb_petro_real['Ano'] = pb_petro_real['Data'].dt.year
pb_petro_real['Mes'] = pb_petro_real['Data'].dt.month
pb_petro_real['Dia'] = pb_petro_real['Data'].dt.day

In [46]:
#Listas para salvar as métricas e parâmetros
parametros = []
metricas = []

val = []
pred = []

data = []

In [47]:
data1 = ['2015-10-01', '2015-11-01', '2015-12-01', '2016-01-01',
         '2016-02-01',  '2016-03-01', '2016-04-01', 
        '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01'
        , '2016-09-01', '2016-10-01', '2016-11-01']

data2 = ['2018-10-01', '2018-11-01', '2018-12-01','2019-01-01',
         '2019-02-01', '2019-03-01', '2019-04-01', 
        '2019-05-01',  '2019-06-01',  '2019-07-01', '2019-08-01'
         '2019-09-01', '2019-10-01', '2019-11-01']

data3 = [ '2018-11-01', '2018-12-01', '2019-01-01',
        '2019-02-01', '2019-03-01', '2019-04-01', '2019-05-01',
        '2019-06-01', '2019-07-10', '2019-08-01'
        ,'2019-09-01', '2019-10-01',  '2019-11-01', '2019-12-01']

data4 = ['2018-12-01', '2019-01-01', '2019-02-01',
        '2019-03-01', '2019-04-01', '2019-05-01',
        '2019-06-01', '2019-07-01', '2019-08-01', '2019-09-01'
        ,'2019-10-01', '2019-11-01', '2019-12-01', '2020-01-01']

In [48]:
len(data1)

14

In [49]:
for i in range (0, 10):

    #treino
    treino = pb_petro_real.loc[(pb_petro_real['Data']>=data1[i]) & (pb_petro_real['Data'] < data2[i])]
    y_treino = treino['Preco_Acao_Fechamento'].values
    data_treino = treino ['Data']
    del treino['Preco_Acao_Fechamento']
    del treino ['Data']

    #validação
    validacao = pb_petro_real.loc[(pb_petro_real['Data']>=data2[i]) & (pb_petro_real['Data'] < data3[i])]
    y_validacao = validacao['Preco_Acao_Fechamento'].values
    data_validacao = validacao['Data']
    del validacao['Preco_Acao_Fechamento']
    del validacao ['Data']
    
    #teste
    teste = pb_petro_real.loc[(pb_petro_real['Data']>=data3[i]) & (pb_petro_real['Data'] < data4[i])]
    y_teste = teste['Preco_Acao_Fechamento'].values
    data_teste = teste ['Data']
    del teste['Preco_Acao_Fechamento']
    del teste ['Data']

    
#    if teste.shape[0] >= 5:
#        if validacao.shape[0] >= 5:
    lgb_train = lgb.Dataset(treino.values, y_treino)
    lgb_eval = lgb.Dataset(validacao.values, y_validacao, reference=lgb_train)

    def objective(trial):

            # 2. Suggest values of the hyperparameters using a trial object.
            param = {
                'objective': 'regression',
                'metric': 'mse',
                'boosting_type': trial.suggest_categorical("boosting_type", ["gbdt", "dart"]),
                'num_leaves': trial.suggest_int('num_leaves', 2, 256),
                'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
                'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
                'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
                'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
                'max_depth': trial.suggest_int('max_depth', 5, 100),
            }

            gbm = lgb.train(param, lgb_train)
            y_pred = gbm.predict(validacao.values)
            rmse = mean_squared_error(y_validacao, y_pred) ** 0.5
            return rmse

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=30)

    final_model = lgb.LGBMRegressor(**study.best_params)
    y_pred2 = final_model.fit(treino.values, y_treino).predict(teste)

    rmse = mean_squared_error(y_teste ,y_pred2) ** 0.5
    print(rmse)

    parametros.append(study.best_params)
    metricas.append(rmse)
    val.append(y_validacao)
    pred.append(y_pred2)
    data.append(data_validacao)

[I 2020-07-02 22:02:13,860] Finished trial#0 with value: 6.54442792999266 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 251, 'feature_fraction': 0.41558816535668436, 'bagging_fraction': 0.8419383314379572, 'bagging_freq': 2, 'min_child_samples': 79, 'max_depth': 65}. Best is trial#0 with value: 6.54442792999266.
[I 2020-07-02 22:02:14,260] Finished trial#1 with value: 8.486006438091518 with parameters: {'boosting_type': 'dart', 'num_leaves': 187, 'feature_fraction': 0.8495279856758382, 'bagging_fraction': 0.6541765435503608, 'bagging_freq': 3, 'min_child_samples': 46, 'max_depth': 56}. Best is trial#0 with value: 6.54442792999266.
[I 2020-07-02 22:02:14,592] Finished trial#2 with value: 6.819767520537571 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 92, 'feature_fraction': 0.9619286916498669, 'bagging_fraction': 0.5655404932193057, 'bagging_freq': 7, 'min_child_samples': 98, 'max_depth': 59}. Best is trial#0 with value: 6.54442792999266.
[I 2020-07-02 22:02:14,901

6.012971854088638


[I 2020-07-02 22:02:30,193] Finished trial#0 with value: 3.5380577798787796 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 165, 'feature_fraction': 0.7237996028328795, 'bagging_fraction': 0.4859173196049187, 'bagging_freq': 5, 'min_child_samples': 64, 'max_depth': 5}. Best is trial#0 with value: 3.5380577798787796.
[I 2020-07-02 22:02:30,613] Finished trial#1 with value: 3.8168061207206483 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 203, 'feature_fraction': 0.8271542537566003, 'bagging_fraction': 0.6142121643853315, 'bagging_freq': 5, 'min_child_samples': 56, 'max_depth': 87}. Best is trial#0 with value: 3.5380577798787796.
[I 2020-07-02 22:02:31,083] Finished trial#2 with value: 6.653012252430298 with parameters: {'boosting_type': 'dart', 'num_leaves': 215, 'feature_fraction': 0.6759578412435157, 'bagging_fraction': 0.9397175389936202, 'bagging_freq': 2, 'min_child_samples': 46, 'max_depth': 9}. Best is trial#0 with value: 3.5380577798787796.
[I 2020-07-02 22:02

1.4082695913615793


[I 2020-07-02 22:02:49,243] Finished trial#0 with value: 4.059582342761472 with parameters: {'boosting_type': 'dart', 'num_leaves': 27, 'feature_fraction': 0.6300490323437284, 'bagging_fraction': 0.549884857092795, 'bagging_freq': 7, 'min_child_samples': 39, 'max_depth': 31}. Best is trial#0 with value: 4.059582342761472.
[I 2020-07-02 22:02:49,770] Finished trial#1 with value: 1.4185619206325735 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 26, 'feature_fraction': 0.9928294941102391, 'bagging_fraction': 0.8289393945514452, 'bagging_freq': 1, 'min_child_samples': 41, 'max_depth': 45}. Best is trial#1 with value: 1.4185619206325735.
[I 2020-07-02 22:02:50,154] Finished trial#2 with value: 1.577177677350907 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 193, 'feature_fraction': 0.8633376273523636, 'bagging_fraction': 0.6857974385748715, 'bagging_freq': 1, 'min_child_samples': 86, 'max_depth': 28}. Best is trial#1 with value: 1.4185619206325735.
[I 2020-07-02 22:02:50

8.020857867409791


[I 2020-07-02 22:03:08,287] Finished trial#0 with value: 9.802935485163873 with parameters: {'boosting_type': 'dart', 'num_leaves': 21, 'feature_fraction': 0.9134607136025376, 'bagging_fraction': 0.44556782945248785, 'bagging_freq': 2, 'min_child_samples': 95, 'max_depth': 89}. Best is trial#0 with value: 9.802935485163873.
[I 2020-07-02 22:03:08,733] Finished trial#1 with value: 10.480033302451895 with parameters: {'boosting_type': 'dart', 'num_leaves': 61, 'feature_fraction': 0.4238097286020854, 'bagging_fraction': 0.7022262043919718, 'bagging_freq': 3, 'min_child_samples': 62, 'max_depth': 59}. Best is trial#0 with value: 9.802935485163873.
[I 2020-07-02 22:03:09,141] Finished trial#2 with value: 8.706206149943203 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 132, 'feature_fraction': 0.6633439695777765, 'bagging_fraction': 0.46592080272876935, 'bagging_freq': 1, 'min_child_samples': 47, 'max_depth': 48}. Best is trial#2 with value: 8.706206149943203.
[I 2020-07-02 22:03:0

6.637847960384765


[I 2020-07-02 22:03:28,137] Finished trial#0 with value: 4.815679037199503 with parameters: {'boosting_type': 'dart', 'num_leaves': 36, 'feature_fraction': 0.9327003178176981, 'bagging_fraction': 0.8673519451091194, 'bagging_freq': 7, 'min_child_samples': 54, 'max_depth': 27}. Best is trial#0 with value: 4.815679037199503.
[I 2020-07-02 22:03:28,434] Finished trial#1 with value: 3.728473793780859 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 57, 'feature_fraction': 0.4173265561143219, 'bagging_fraction': 0.6420819938793659, 'bagging_freq': 1, 'min_child_samples': 64, 'max_depth': 41}. Best is trial#1 with value: 3.728473793780859.
[I 2020-07-02 22:03:28,779] Finished trial#2 with value: 6.549715909198064 with parameters: {'boosting_type': 'dart', 'num_leaves': 228, 'feature_fraction': 0.8120980963056084, 'bagging_fraction': 0.556023071310685, 'bagging_freq': 4, 'min_child_samples': 76, 'max_depth': 83}. Best is trial#1 with value: 3.728473793780859.
[I 2020-07-02 22:03:29,38

5.454539821857647


[I 2020-07-02 22:03:47,116] Finished trial#0 with value: 8.158718185381295 with parameters: {'boosting_type': 'dart', 'num_leaves': 222, 'feature_fraction': 0.6864071194851331, 'bagging_fraction': 0.7852702518198271, 'bagging_freq': 6, 'min_child_samples': 38, 'max_depth': 92}. Best is trial#0 with value: 8.158718185381295.
[I 2020-07-02 22:03:47,450] Finished trial#1 with value: 6.534723813914403 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 214, 'feature_fraction': 0.43725177346523736, 'bagging_fraction': 0.7188555881809544, 'bagging_freq': 5, 'min_child_samples': 83, 'max_depth': 78}. Best is trial#1 with value: 6.534723813914403.
[I 2020-07-02 22:03:48,087] Finished trial#2 with value: 4.9128115205289475 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 130, 'feature_fraction': 0.9462644610164457, 'bagging_fraction': 0.6933953140967812, 'bagging_freq': 6, 'min_child_samples': 22, 'max_depth': 86}. Best is trial#2 with value: 4.9128115205289475.
[I 2020-07-02 22:03

2.2255773728131403


[I 2020-07-02 22:04:04,684] Finished trial#0 with value: 6.973527126719921 with parameters: {'boosting_type': 'dart', 'num_leaves': 30, 'feature_fraction': 0.8731213804258691, 'bagging_fraction': 0.5159431108893304, 'bagging_freq': 4, 'min_child_samples': 61, 'max_depth': 34}. Best is trial#0 with value: 6.973527126719921.
[I 2020-07-02 22:04:05,007] Finished trial#1 with value: 2.5986350361734996 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 15, 'feature_fraction': 0.8851026758295715, 'bagging_fraction': 0.7187375953774477, 'bagging_freq': 1, 'min_child_samples': 49, 'max_depth': 34}. Best is trial#1 with value: 2.5986350361734996.
[I 2020-07-02 22:04:05,291] Finished trial#2 with value: 2.7005130599085816 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 122, 'feature_fraction': 0.7982457102721863, 'bagging_fraction': 0.7709403797197569, 'bagging_freq': 3, 'min_child_samples': 67, 'max_depth': 60}. Best is trial#1 with value: 2.5986350361734996.
[I 2020-07-02 22:04:

1.3380753495753088


[I 2020-07-02 22:04:16,359] Finished trial#0 with value: 4.2108328144976435 with parameters: {'boosting_type': 'dart', 'num_leaves': 5, 'feature_fraction': 0.8614394641755534, 'bagging_fraction': 0.799038105590848, 'bagging_freq': 6, 'min_child_samples': 74, 'max_depth': 93}. Best is trial#0 with value: 4.2108328144976435.
[I 2020-07-02 22:04:16,655] Finished trial#1 with value: 4.928438914942005 with parameters: {'boosting_type': 'dart', 'num_leaves': 150, 'feature_fraction': 0.4527104948871363, 'bagging_fraction': 0.7005582721083838, 'bagging_freq': 1, 'min_child_samples': 84, 'max_depth': 66}. Best is trial#0 with value: 4.2108328144976435.
[I 2020-07-02 22:04:17,052] Finished trial#2 with value: 1.6946680638774598 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 175, 'feature_fraction': 0.7960519780001168, 'bagging_fraction': 0.6105762510162256, 'bagging_freq': 7, 'min_child_samples': 45, 'max_depth': 98}. Best is trial#2 with value: 1.6946680638774598.
[I 2020-07-02 22:04:

2.6781429763922238


[I 2020-07-02 22:04:30,326] Finished trial#0 with value: 3.734818764715996 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 50, 'feature_fraction': 0.4374496576640474, 'bagging_fraction': 0.8329208650551128, 'bagging_freq': 2, 'min_child_samples': 48, 'max_depth': 21}. Best is trial#0 with value: 3.734818764715996.
[I 2020-07-02 22:04:30,620] Finished trial#1 with value: 5.414110299541302 with parameters: {'boosting_type': 'dart', 'num_leaves': 231, 'feature_fraction': 0.4434532363661977, 'bagging_fraction': 0.6469234198132965, 'bagging_freq': 1, 'min_child_samples': 60, 'max_depth': 35}. Best is trial#0 with value: 3.734818764715996.
[I 2020-07-02 22:04:30,870] Finished trial#2 with value: 6.550563048943886 with parameters: {'boosting_type': 'dart', 'num_leaves': 247, 'feature_fraction': 0.5769876670429583, 'bagging_fraction': 0.5391039848380919, 'bagging_freq': 2, 'min_child_samples': 99, 'max_depth': 58}. Best is trial#0 with value: 3.734818764715996.
[I 2020-07-02 22:04:31,

2.4445208036355655


[I 2020-07-02 22:04:39,017] Finished trial#0 with value: 4.071019050294456 with parameters: {'boosting_type': 'dart', 'num_leaves': 112, 'feature_fraction': 0.954602635547015, 'bagging_fraction': 0.7408043383384235, 'bagging_freq': 6, 'min_child_samples': 99, 'max_depth': 87}. Best is trial#0 with value: 4.071019050294456.
[I 2020-07-02 22:04:39,483] Finished trial#1 with value: 1.3619419408727151 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 68, 'feature_fraction': 0.9939114890219161, 'bagging_fraction': 0.9506203616587963, 'bagging_freq': 4, 'min_child_samples': 28, 'max_depth': 98}. Best is trial#1 with value: 1.3619419408727151.
[I 2020-07-02 22:04:40,030] Finished trial#2 with value: 4.912821485270778 with parameters: {'boosting_type': 'dart', 'num_leaves': 41, 'feature_fraction': 0.7862531403638682, 'bagging_fraction': 0.7947572954394668, 'bagging_freq': 4, 'min_child_samples': 11, 'max_depth': 42}. Best is trial#1 with value: 1.3619419408727151.
[I 2020-07-02 22:04:40

1.0794897857965882


In [50]:
metricas

In [51]:
np.argmin(metricas)

9

In [61]:
print(len(parametros))
print(parametros[np.argmin(metricas)])

10
{'boosting_type': 'gbdt', 'num_leaves': 68, 'feature_fraction': 0.9939114890219161, 'bagging_fraction': 0.9506203616587963, 'bagging_freq': 4, 'min_child_samples': 28, 'max_depth': 98}


In [96]:
final_model = lgb.LGBMRegressor(**parametros[np.argmin(metricas)])
final_model.fit(treino.values, y_treino)
y_val_pred = final_model.predict(validacao)
y_test_pred = final_model.predict(teste)


In [99]:
y_real = np.concatenate([y_treino, y_validacao, y_teste])
y_prd = np.concatenate([y_treino, y_val_pred, y_test_pred])
data = np.concatenate([data_treino, data_validacao, data_teste])

In [101]:
plt.figure(figsize=(20,10))
plt.plot(data,y_real)
plt.plot(y_prd)

ValueError: view limit minimum -36865.05 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units