# Bibliotecas

In [1]:
import pandas as pd
import numpy as np
import investpy as inv
from alpha_vantage.timeseries import TimeSeries
import time
from alpha_vantage.techindicators import TechIndicators
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

# Extrair dados de ações

In [365]:
ts = TimeSeries(key = key_alphavantage, output_format = 'pandas')
dados, meta_dados = ts.get_daily(symbol='AAPL', outputsize='full')

In [366]:
dados.sort_values(by = 'date', inplace = True)

In [367]:
dados.tail()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-25,148.68,149.37,147.6211,148.64,50720556.0
2021-10-26,149.33,150.84,149.0101,149.32,60893395.0
2021-10-27,149.36,149.73,148.49,148.85,56094929.0
2021-10-28,149.82,153.165,149.72,152.57,100077888.0
2021-10-29,147.215,149.94,146.4128,149.8,124953168.0


# Criar coluna de retorno e com nome do ticker

In [368]:
dados['Return'] = dados['4. close'].pct_change()

In [369]:
dados['Symbol'] = 'AAPL'

In [370]:
dados.head()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume,Return,Symbol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1999-11-01,80.0,80.69,77.37,77.62,2487300.0,,AAPL
1999-11-02,78.0,81.69,77.31,80.25,3564600.0,0.033883,AAPL
1999-11-03,81.62,83.25,81.0,81.5,2932700.0,0.015576,AAPL
1999-11-04,82.06,85.37,80.62,83.62,3384700.0,0.026012,AAPL
1999-11-05,84.62,88.37,84.0,88.31,3721500.0,0.056087,AAPL


# Extrair dados de indicadores (MACD, RSI, OBV, STOCH, WILLR, AROON)

In [7]:
ti = TechIndicators(key = key_alphavantage, output_format = 'pandas')
data_ti, meta_dados_ti = ti.get_obv(symbol='AAPl', interval='daily')

In [156]:
data_ti.head()

Unnamed: 0_level_0,OBV
date,Unnamed: 1_level_1
1999-11-01,278577900.0
1999-11-02,677813500.0
1999-11-03,1006276000.0
1999-11-04,1385363000.0
1999-11-05,1802171000.0


In [176]:
data_ti, meta_dados_ti = ti.get_aroonosc(symbol='AAPl', interval='daily', time_period = 14)
data_ti.head()

Unnamed: 0_level_0,AROONOSC
date,Unnamed: 1_level_1
1999-11-19,28.5714
1999-11-22,28.5714
1999-11-23,14.2857
1999-11-24,14.2857
1999-11-26,7.1429


In [343]:
data_ti, meta_dados_ti = ti.get_macd(symbol='AAPl', interval='daily', series_type='close')
data_ti.sort_values(by = 'date', inplace = True)
data_ti.head()

Unnamed: 0_level_0,MACD_Hist,MACD,MACD_Signal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1999-12-17,-0.0177,0.0246,0.0423
1999-12-20,-0.0167,0.0214,0.0381
1999-12-21,-0.0134,0.0214,0.0348
1999-12-22,-0.0122,0.0195,0.0317
1999-12-23,-0.0093,0.0201,0.0294


In [175]:
data_ti, meta_dados_ti = ti.get_rsi(symbol='AAPl', interval='daily', series_type = 'close', time_period = 14)
data_ti.head()

Unnamed: 0_level_0,RSI
date,Unnamed: 1_level_1
1999-11-19,69.9838
1999-11-22,66.4703
1999-11-23,68.5184
1999-11-24,70.2012
1999-11-26,70.535


In [177]:
data_ti, meta_dados_ti = ti.get_stoch(symbol='AAPl', interval='daily', slowkmatype=1, slowdmatype=1, fastkperiod=14, slowkperiod=14, slowdperiod = 14)
data_ti.head()

Unnamed: 0_level_0,SlowK,SlowD
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-10-29,78.2113,63.4771
2021-10-28,78.5663,61.2103
2021-10-27,75.9242,58.5401
2021-10-26,74.8505,55.8656
2021-10-25,72.9903,52.9449


In [178]:
data_ti, meta_dados_ti = ti.get_willr(symbol='AAPl', interval='daily', time_period = 14)
data_ti.head()

Unnamed: 0_level_0,WILLR
date,Unnamed: 1_level_1
1999-11-18,-39.716
1999-11-19,-25.906
1999-11-22,-41.5546
1999-11-23,-28.7551
1999-11-24,-22.1413


# Construir coluna de previsão

In [325]:
close_groups = dados.groupby('Symbol')['4. close']

In [326]:
close_groups = close_groups.transform(lambda x : np.sign(x.diff(-30)))

In [327]:
dados['Prediction'] = close_groups

In [328]:
dados.sort_values(by = 'date', inplace = True)

In [329]:
dados['Prediction'].loc[dados['Prediction'] == 1.0] = -2.0
dados['Prediction'].loc[dados['Prediction'] == -1.0] = 1.0
dados['Prediction'].loc[dados['Prediction'] == -2.0] = -1.0
dados['Prediction'].loc[dados['Prediction'] == 0.0] = 1.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_

In [330]:
dados

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume,Return,Symbol,Prediction
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1999-11-01,80.000,80.690,77.3700,77.62,2487300.0,,AAPL,1.0
1999-11-02,78.000,81.690,77.3100,80.25,3564600.0,0.033883,AAPL,1.0
1999-11-03,81.620,83.250,81.0000,81.50,2932700.0,0.015576,AAPL,1.0
1999-11-04,82.060,85.370,80.6200,83.62,3384700.0,0.026012,AAPL,1.0
1999-11-05,84.620,88.370,84.0000,88.31,3721500.0,0.056087,AAPL,1.0
...,...,...,...,...,...,...,...,...
2021-10-25,148.680,149.370,147.6211,148.64,50720556.0,-0.000336,AAPL,
2021-10-26,149.330,150.840,149.0101,149.32,60893395.0,0.004575,AAPL,
2021-10-27,149.360,149.730,148.4900,148.85,56094929.0,-0.003148,AAPL,
2021-10-28,149.820,153.165,149.7200,152.57,100077888.0,0.024992,AAPL,


# Organizar dados

In [331]:
dados = dados.dropna()

In [364]:
dados.head()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-10-29,147.215,149.94,146.4128,149.8,124953168.0
2021-10-28,149.82,153.165,149.72,152.57,100077888.0
2021-10-27,149.36,149.73,148.49,148.85,56094929.0
2021-10-26,149.33,150.84,149.0101,149.32,60893395.0
2021-10-25,148.68,149.37,147.6211,148.64,50720556.0


In [333]:
columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'Symbol', 'Prediction']

In [334]:
dados.columns = columns

In [336]:
dados = dados[['Symbol', 'Open', 'High', 'Low', 'Close', 'Return', 'Prediction']]

# Compilar dados de 6 ações

In [5]:
six = ['AAPL']
#, 'MSFT', 'AMZN', 'MRNA', 'TSLA', 'GOOG'

In [6]:
ts = TimeSeries(key = ':OAUWJTXO8R6PWJER.Please', output_format = 'pandas')
ti = TechIndicators(key = ':OAUWJTXO8R6PWJER.Please', output_format = 'pandas')
acoes = pd.DataFrame()
columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'Symbol']

for i in six:
    #getting and formating data
    dados, meta_dados = ts.get_daily(symbol=i, outputsize='full')
    acao = dados['2021':'2010']
    acao.sort_values(by = 'date', inplace = True)
    acao['Return'] = acao['4. close'].pct_change()
    acao['Symbol'] = i
    acao.columns = columns
    
    #prediction column
    close_groups = acao.groupby('Symbol')['Close']
    close_groups = close_groups.transform(lambda x : np.sign(x.diff(-30)))
    acao['Prediction'] = close_groups
    acao.sort_values(by = 'date', inplace = True)
    acao['Prediction'].loc[acao['Prediction'] == 1.0] = -2.0
    acao['Prediction'].loc[acao['Prediction'] == -1.0] = 1.0
    acao['Prediction'].loc[acao['Prediction'] == -2.0] = -1.0
    acao['Prediction'].loc[acao['Prediction'] == 0.0] = 1.0
    
    #indicators
    data_obv, meta_obv = ti.get_obv(symbol=i, interval='daily')
    acao['OBV'] = data_obv
    time.sleep(15)
    
    data_aroon, meta_aroon = ti.get_aroonosc(symbol=i, interval='daily', time_period = 14)
    acao['AROON'] = data_aroon
    time.sleep(15)
    
    data_macd, meta_macd = ti.get_macd(symbol=i, interval='daily', series_type='close')
    data_macd.sort_values(by = 'date', inplace=True)
    acao['MACD'] = data_macd['MACD']
    time.sleep(15)
    
    data_stoch, meta_dados_ti = ti.get_stoch(symbol=i, interval='daily', slowkmatype=1, slowdmatype=1, fastkperiod=14, slowkperiod=14, slowdperiod = 14)
    data_stoch.sort_values(by = 'date', inplace=True)
    acao['SlowK'] = data_stoch['SlowK']
    acao['SlowD'] = data_stoch['SlowD']
    time.sleep(15)
    
    data_rsi, meta_dados_ti = ti.get_rsi(symbol=i, interval='daily', series_type = 'close', time_period = 14)
    acao['RSI'] = data_rsi
    time.sleep(15)
    
    data_willr, meta_dados_ti = ti.get_willr(symbol=i, interval='daily', time_period = 14)
    acao['WILLR'] = data_willr
    
    acao = acao.dropna()
    
    acoes = acoes.append(acao)
    time.sleep(15)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acao.sort_values(by = 'date', inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acao['Return'] = acao['4. close'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acao['Symbol'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the doc

In [7]:
acoes.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Return,Symbol,Prediction,OBV,AROON,MACD,SlowK,SlowD,RSI,WILLR
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-01-05,214.6,215.59,213.25,214.38,21496600.0,0.001729,AAPL,-1.0,78760140000.0,78.5714,0.1315,78.3192,61.4446,67.6518,-4.9207
2010-01-06,214.38,215.23,210.75,210.97,19720000.0,-0.015906,AAPL,-1.0,78207980000.0,78.5714,0.1303,78.7049,63.746,61.1913,-18.7881
2010-01-07,211.75,212.0,209.05,210.58,17040400.0,-0.001849,AAPL,-1.0,77730850000.0,78.5714,0.1269,78.8277,65.7569,60.48,-20.3741
2010-01-08,210.3,212.0,209.06,211.98,15986100.0,0.006648,AAPL,-1.0,78178460000.0,78.5714,0.1263,79.557,67.5969,62.1797,-15.7025
2010-01-11,212.8,213.0,208.45,210.11,16508200.0,-0.008822,AAPL,-1.0,77716230000.0,71.4286,0.1198,78.6147,69.0659,58.557,-27.51


# Modelo de Treino

In [24]:
X_cols = acoes[['OBV', 'AROON', 'MACD', 'SlowK', 'SlowD', 'RSI', 'WILLR']]
Y_cols = acoes['Prediction']

X_train, X_test, Y_train, Y_test = train_test_split(X_cols, Y_cols, test_size = 0.25, random_state=42)


In [25]:
rand_frst_clf = RandomForestClassifier(n_estimators = 1000, oob_score = True, criterion='gini', random_state=42)

In [26]:
rand_frst_clf.fit(X_train, Y_train)

RandomForestClassifier(n_estimators=1000, oob_score=True, random_state=42)

# Modelo de Prática

In [27]:
Y_pred = rand_frst_clf.predict(X_test)

In [28]:
print('Correct Prediction (%):', accuracy_score(Y_test, Y_pred, normalize=True )*100.0)

Correct Prediction (%): 81.27544097693351
