In [68]:
#Importando as bibliotecas
import pandas as pd
import pandas_datareader as pdr
import plotly
import plotly.offline as py
import plotly.io as pio
import plotly.graph_objs as go
from plotly.offline import plot, iplot
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, KFold, cross_val_score, cross_val_predict
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
pio.renderers
plotly.offline.init_notebook_mode(connected=True)

Lendo Dataframe contendo dados das ações das empresas (Código feito com a Empresa Petrobrás)


In [69]:
df = pd.read_csv('PETR4.SA.csv')

In [70]:
#Removendo as linhas com valores nulos
df.dropna(axis = 0, inplace = True)

In [82]:
df.isnull().sum()

Date          0
Open          0
High          0
Low           0
Close         0
Adj Close     0
Volume        0
Med7          6
Med30        29
dtype: int64

In [72]:
df.head() #Exibindo cabeçalho

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2000-01-03,5.875,5.875,5.875,5.875,4.050103,35389440000.0
1,2000-01-04,5.55,5.55,5.55,5.55,3.826055,28861440000.0
2,2000-01-05,5.494,5.494,5.494,5.494,3.78745,43033600000.0
3,2000-01-06,5.475,5.475,5.475,5.475,3.774351,34055680000.0
4,2000-01-07,5.5,5.5,5.5,5.5,3.791586,20912640000.0


In [73]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,5060.0,5060.0,5060.0,5060.0,5060.0,5060.0
mean,18.05888,18.328252,17.764204,18.027636,15.143805,629491400.0
std,9.547948,9.697718,9.373938,9.509612,8.363701,4198454000.0
min,4.2,4.27,4.12,4.2,3.016034,0.0
25%,9.435625,9.639375,9.27,9.417813,7.39374,20777350.0
50%,17.765,18.045,17.415,17.700001,15.294609,31411300.0
75%,24.24,24.59,23.799999,24.122501,20.746825,48834050.0
max,67.5,67.5,51.950001,52.509998,42.204231,73564160000.0


In [74]:
pio.renderers.default = "colab"

Plotando valores de fechamento das ações

In [75]:
import plotly.graph_objs as go
data = [go.Scatter(
          x=df.Date,
          y=df['Close'])]

py.iplot(data)

Gráfico de Fechamento das Ações

In [76]:
#Coletando Dados e Montando Gráfico em um período determinado
Close = go.Scatter(
    x = df.Date,
    y = df.Close,
    name = "Fechamento Ações Petrobrás",
    line = dict(color = '#17BECF'),
    opacity = 0.5)
data = [Close]

#Gerando Faixa Customizada
layout = dict(
    title = "Gráfico Fechamento Ações Petrobrás",
    title_x = 0.5,
    xaxis = dict(
        range = ['2017-01-01','2017-12-31'])
    )
grafico = dict(data=data, layout=layout)
py.iplot(grafico, filename = "Range Manual")

Rangeslider e Rangeselector

In [78]:
Close = go.Scatter(
    x = df.Date,
    y = df.Close,
    name = "Alta das Ações",
    line = dict(color = '#17BECF'),
    opacity = 0.5)

data = [Close]

layout = dict(
    title = "Gráfico com Botões e RangeSlider",
    title_x = 0.5,
    xaxis = dict(
        rangeselector=dict(
            buttons = list([
                            dict(
                                count = 1,
                                label = 'Mensal',
                                step = 'month',
                                stepmode = 'backward'),
                            dict(
                                count = 6,
                                label = 'Semestral',
                                step = 'month',
                                stepmode = 'backward'),
                            dict(step = 'all',
                                 label = 'Total')
            ])
        ),
        rangeslider = dict(
            visible = True
        ),
        type = 'date'
    )
)

grafico = dict(data=data, layout=layout)
py.iplot(grafico)

Gráficos com Candlesticks

In [79]:
trace = go.Candlestick(
    x = df['Date'],
    open = df['Open'],
    high = df['High'],
    low = df['Low'],
    close = df['Close'])

data = [trace]
py.iplot(data, filename = 'Grafico_Candlestick')


Candlestick Customizado

In [80]:
trace = go.Candlestick(
                x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'],
                increasing = dict(line = dict(color = '#17BECF')),
                decreasing = dict(line = dict(color = '#7F7F7F'))
)
layout = go.Layout(
                title = 'Valores em Formato Candlestick',
                title_x = 0.5,
                xaxis = dict(
                    rangeslider = dict(
                        visible = False
                    )
                )
)
data = [trace]

grafico = go.Figure(data=data, layout=layout)
py.iplot(grafico)

In [81]:
#Gerando Médias dos últimos 7 dias
df['Med7'] = df.Close.rolling(window = 7).mean()
#Gerando Médias dos últimos 30 dias
df['Med30'] = df.Close.rolling(window = 30).mean()
#Visualizando novos dados dentro do Dataframe
df.head(30)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Med7,Med30
0,2000-01-03,5.875,5.875,5.875,5.875,4.050103,35389440000.0,,
1,2000-01-04,5.55,5.55,5.55,5.55,3.826055,28861440000.0,,
2,2000-01-05,5.494,5.494,5.494,5.494,3.78745,43033600000.0,,
3,2000-01-06,5.475,5.475,5.475,5.475,3.774351,34055680000.0,,
4,2000-01-07,5.5,5.5,5.5,5.5,3.791586,20912640000.0,,
5,2000-01-10,5.613,5.613,5.613,5.613,3.869485,19563520000.0,,
6,2000-01-11,5.475,5.475,5.475,5.475,3.774351,23987200000.0,5.568857,
7,2000-01-12,5.483,5.483,5.483,5.483,3.779866,23301120000.0,5.512857,
8,2000-01-13,5.325,5.325,5.325,5.325,3.670943,26383360000.0,5.480714,
9,2000-01-14,5.4,5.4,5.4,5.4,3.722648,16657920000.0,5.467286,


Plotando valores médios e valores de abertura e Fechamento

In [None]:
Close = go.Scatter(
    x = df.Date,
    y = df.Close,
    name = 'Fechamento Ações Petrobrás',
    line = dict(color = '#330000'),
    opacity = 0.5)

Med7 = go.Scatter(
    x = df.Date,
    y = df['Med7'],
    name = 'Média dos ùltimos 7 dias',
    line = dict(color = '#006400'),
    opacity = 0.5)

Med30 = go.Scatter(
    x = df.Date,
    y = df['Med30'],
    name = 'Média das Ações dos últimos 30 dias',
    line = dict(color = '#FF00FF'),
    opacity = 0.5)

grafico = [Close, Med7, Med30]
py.iplot(grafico)

Gráfico Abertura X Fechamento

In [83]:
Close = go.Scatter(
    x = df.Date,
    y = df.Close,
    name = 'Valor das Ações no Fechamento',
    line = dict(color = '#071914'),
    opacity = 0.5)

Open = go.Scatter(
    x = df.Date,
    y = df['Open'],
    name = 'Valor das Ações na Abertura',
    line = dict(color = '#FE2E64'),
    opacity = 0.8)

grafico = [Close, Open]
py.iplot(grafico)

Aplicando Regressao Linear

In [84]:
entrada_regressao = df[['Open', 'High']]
saida_regressao   = df[['Close']]

entrada_regressao.shape


(5060, 2)

In [85]:
from sklearn.linear_model import LinearRegression

In [86]:
mod_regressao = LinearRegression()
regressao = mod_regressao.fit(entrada_regressao, saida_regressao)
previsao = mod_regressao.predict(entrada_regressao)
previsao

array([[ 5.938377  ],
       [ 5.61670601],
       [ 5.56127962],
       ...,
       [22.49093658],
       [22.80187101],
       [22.95892578]])

Gerando Árvore de Decisão

In [87]:
from sklearn.tree import DecisionTreeRegressor
arvore_regressora = DecisionTreeRegressor() #Definindo objeto para árvore de regressao
arvore_regressora.fit(entrada_regressao, saida_regressao)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [88]:
#Realizando a Previsão
previsao_arvore = arvore_regressora.predict(entrada_regressao)

In [89]:
R_2 = r2_score(saida_regressao, previsao_arvore) #Calculo do R2
print("Coeficiente de Determinação R2 para árvore: ", R_2)

Coeficiente de Determinação R2 para árvore:  0.9999925092844358


Montando Decisões tomadas pela Árvore

In [66]:
from sklearn import tree
import pydotplus
dot_data = tree.export_graphviz(arvore_regressora, out_file = None)
grafico = pydotplus.graph_from_dot_data(dot_data)
grafico.write_pdf('Petrobras.pdf')

True