<a href="https://colab.research.google.com/github/maiaufrrj/BRtrading/blob/main/Long_Target.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#! pip install pycaret --upgrade pandas --upgrade
#! pip install sweetviz

In [1]:
import numpy as np
import pickle
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from pycaret.classification import *
from google.colab import drive
import warnings
warnings.filterwarnings("ignore")

In [None]:
drive.mount('/content/drive')

In [18]:
filename = '/content/drive/MyDrive/ColabNotebooks/datasets/trading/EURUSD_h1.pkl'
df = pickle.load(open(filename, 'rb'))

In [19]:
print('início do dataset: '+ str(df.index[1]))
print('final do dataset: '+ str(df.index[-1]))

início do dataset: 2005-09-07 00:00:00
final do dataset: 2021-10-29 23:00:00


<h2>Separando datasets<br>
<h5> df_train: utilizado no treino e validações cruzadas<br>
<h5> df_valid: utilizado após os testes para validação final<br>

In [20]:
separar=6500 #separando 6500hs para validação (01-01-2021 --> 29-10-2021 23:00)

#dataset utilizado para treino e validações cruzadas
df_train = df.iloc[:-(separar-1),:]

#dataset utilizado após os testes para validação final
df_valid = df.iloc[-separar:,:]

In [21]:
df_train.shape

(93500, 7)

<h2>Análise Exploratória<br>
<h5>Histograma para cada passo à frente (definindo alvos)

In [22]:
#função para verificar variação para n passos à frente
def Delta(DF,n):
    '''Busca os n valores à frente'''
    df = DF.copy()
    for i in n:
      name='delta'+str(i)
      df[name] = 100*((df['close'].shift(-i) - df['close'])/df['close'])
      df.dropna(inplace=True)
    
    del df['open'],df['high'],df['low'],df['tick_volume'],df['spread'],df['real_volume']
    return df

lista_forecast=list(range(1,13))
df_analise = Delta(df_train,lista_forecast)

In [None]:
import sweetviz as sv
#Gerar relatório do Sweetviz
report = sv.analyze(df_analise)
report.show_html("/content/drive/MyDrive/ColabNotebooks/datasets/trading/Analise_EURUSD.html")

In [23]:
alvo=0.38
delta=6

In [24]:
df_analise[df_analise > alvo].count()
coluna='delta'+str(delta)
print('Percentual maior que o alvo('+str(alvo) + '%) em delta('+str(delta)+') = {:.2f}'.format(100*(df_analise[coluna][df_analise[coluna] > alvo].count()/93500))+'%')

Percentual maior que o alvo(0.38%) em delta(6) = 7.02%


In [None]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.histogram(df, x=df_analise[coluna], nbins=100, histnorm='percent')
fig.show()

#histograma de variação de 12 passos à frente
coluna2='delta12'
fig = px.histogram(df, x=df_analise[coluna2], nbins=100, histnorm='percent')
fig.show()

<h2>Criando Features<br>

In [25]:
#criar feature ADF

def MediaMovelSimples(DF,n):
    ''' função para calcular Média Móvel ---indicador de tendência'''
    df = DF.copy()
    name='MM'+ str(n)  
    df[name] = df['close'].rolling(window=n, min_periods=1).mean()
    df.dropna(inplace=True) 
    return df
    
def DesvioPadrao(DF,n):
    ''' função para calcular Desvio Padrão ---indicador de volatilidade'''
    df = DF.copy()
    name='STD'+ str(n)
    df[name] = df['close'].rolling(window=n, min_periods=1).std()
    df.dropna(inplace=True)  
    return df

def MediaMovelExponcial(DF,n):
    ''' função para calcular Média Móvel Exponencial ------indicador de tendência'''
    df = DF.copy()
    name='EWM'+ str(n)
    df[name] = df['close'].ewm(span=n).mean()
    df.dropna(inplace=True) 
    return df
    
def ATR(DF,n):
    '''função para calcular TR e ATR ---indicadores de volatilidade'''    
    df = DF.copy()
    name1='ATR'+ str(n)
    name2='TR' + str(n)
    df['H-L'] = abs(df['high']-df['low'])
    df['H-PC']= abs(df['high']-df['close'].shift(1))
    df['L-PC']=abs(df['low']-df['close'].shift(1))
    df[name2]=  df[['H-L','H-PC', 'L-PC']].max(axis=1,skipna=False)
    df[name1]= df[name2].rolling(n).mean()
    df = df.drop(['H-L','H-PC', 'L-PC'],axis=1)
    df.dropna(inplace=True) 
    return df
  
def BollBnd(DF,n):
    '''função para calcular Bandas de Bollinger ---indicador de tendência''' 
    df = DF.copy()
    name1='BBsup'+ str(n)
    name2='BBinf'+ str(n)
    name3='BBlarg'+ str(n)
    df['MA'] = df['close'].rolling(n).mean()
    df[name1] = df['MA'] + 2*df['MA'].rolling(n).std()
    df[name2] = df['MA'] - 2*df['MA'].rolling(n).std()
    df[name3] = df[name1]-df[name2]
    df = df.drop(['MA'],axis=1)
    df.dropna(inplace=True)
    return df

def RSI(DF,n):
    '''função para calcular RSI (Índice de Força Relativa) ---indicador de tendência''' 
    df = DF.copy()
    name='RSI'+ str(n)
    df['delta'] = df['close'] - df['close'].shift(1)
    df['gain'] = np.where(df['delta']>=0,df['delta'],0)
    df['loss'] = np.where(df['delta']<0,abs(df['delta']),0)
    avg_gain = [] 
    avg_loss = []
    gain = df['gain'].tolist()
    loss= df['loss'].tolist()
    for i in range(len(df)):
        if i < n:
            avg_gain.append(np.NaN)
            avg_loss.append(np.NaN)
        elif i==n:
            avg_gain.append(df['gain'].rolling(n).mean().tolist()[n])
            avg_loss.append(df['loss'].rolling(n).mean().tolist()[n])
        elif i>n:
            avg_gain.append(((n-1)*avg_gain[i-1]+gain[i])/n)
            avg_loss.append(((n-1)*avg_loss[i-1]+loss[i])/n)   
    
    df['avg_gain']=np.array(avg_gain)
    df['avg_loss']=np.array(avg_loss)
    df['RS']=df['avg_gain']/df['avg_loss']
    df[name]=100-(100/(1+df['RS']))
    df = df.drop(['delta','avg_gain','avg_loss','loss','gain','RS'],axis=1)
    df.dropna(inplace=True)  
    return df

   
def nIndicadores(DF,n):
    df=DF.copy()
    df = MediaMovelSimples(df,n)
    df = DesvioPadrao(df,n)
    df = MediaMovelExponcial(df,n)
    df = ATR(df,n)
    df = BollBnd(df,n)
    df = RSI(df,n)
    return df

#lista=[3,5,7,9,24,50,120] #otimizar essa sequência
#usando sequência de Fibonacci
lista=[3, 5, 8, 13, 21, 34, 55, 89, 144]

for n in lista:
    df_train=nIndicadores(df_train,n)

Anotações

In [None]:
#def Forecast(DF,n):
#    '''Busca os n preços de fechamento a frente'''
#    df = DF.copy()
#    df['y']=df['close'].shift(-n)
#    df.dropna(inplace=True)
#    return df

#def movimento_alta(DF,n,percentual_alta):
#    '''Verifica se o preço de fechamento n períodos a frente é maior que um certo limite percentual (alta)'''
#    df = DF.copy()
#    df['delta'] = (df['close'].shift(-n) - df['close'])/df['close']
#    df['alta'] = np.where(df['delta']>=percentual_alta,1,0)
#    df = df.drop(['delta'],axis=1)
#    df.dropna(inplace=True)  
#    return df

#def low_target(DF,n,percentual_baixa):
    '''Verifica se o preço de fechamento n períodos a frente é menor que um certo limite percentual (alta)'''
#    df = DF.copy()
#    df['delta'] = (df['close'].shift(-n) - df['close'])/df['close']
#    df['baixa'] = np.where(df['delta']<=-percentual_baixa,1,0)
#    df = df.drop(['delta'],axis=1)
#    df.dropna(inplace=True)  
#    return df

#def low_target(DF, baixa_esperada, periodos_frente):
#    '''avalia n períodos à frente, se o valor máximo atinge a alta_esperada'''
#    df = DF.copy()

#    '''cria n colunas delta, uma para cada período a frente e calcula a diferença entre fechamento futuro e atual'''
#    for i in range(periodos_frente):
#      name='delta'+str(i)
#      df[name] = (df['close'].shift(-i) - df['close'])/df['close']
#
#    for i in range(periodos_frente):
#      df['baixa'] = np.where(df['delta'+str(i)]<=-baixa_esperada,1,0)
    
#    for i in range(periodos_frente):
#      name='delta'+str(i)
#      df = df.drop([name],axis=1)
  
#    df.dropna(inplace=True)
#    return df

#def low_target(DF, baixa_esperada, periodos_frente):
#    '''avalia n períodos à frente, se o valor máximo atinge a alta_esperada'''
#    df = DF.copy()

#    '''cria n colunas delta, uma para cada período a frente e calcula a diferença entre fechamento futuro e atual'''
#    for i in range(periodos_frente):
#      name='delta_close'+str(i)
#      df[name] = (df['close'].shift(-i) - df['close'])/df['close']
#      name='delta_low'+str(i)
#      df[name] = (df['low'].shift(-i) - df['close'])/df['close']
#      name='delta_high'+str(i)
#      df[name] = (df['high'].shift(-i) - df['close'])/df['close']

#    for i in range(periodos_frente):
#      df['baixa'] = np.where((df['delta_close'+str(i)]<=-baixa_esperada) | (df['delta_low'+str(i)]<=-baixa_esperada) | (df['delta_high'+str(i)]<=-baixa_esperada), 1, 0)
    
#    for i in range(periodos_frente):
#      name1='delta_close'+str(i)
#      name2='delta_low'+str(i)
#      name3='delta_high'+str(i)
#      df = df.drop([name1,name2,name3],axis=1)

#    df.dropna(inplace=True)
#    return df

<h2>Criando Targets<br>
<h5> LongTarget: <br>
verificar se há chance de aumento de preço n períodos à frente (sinal de compra)<br>

In [26]:
def low_target(DF, baixa_esperada, periodos_frente):
    '''avalia n períodos à frente, se o valor máximo atinge a alta_esperada'''
    df = DF.copy()
    df['baixa'] = 0
    '''cria n colunas delta, uma para cada período a frente e calcula a diferença entre fechamento futuro e atual'''
    for i in range(1,periodos_frente+1):
      name='delta_close'+str(i)
      df[name] = (df['close'].shift(-i) - df['close'])/df['close']

    for i in range(1,periodos_frente+1):
      df['baixa'+str(i)] = np.where(df['delta_close'+str(i)]<=-baixa_esperada,1,0)
    
    for i in range(1,periodos_frente+1):
      df['baixa'] = df['baixa'] + df['baixa'+str(i)]

    #trocando sinais aqui!!!
    df['baixa'] = np.where(df['baixa']>=1,1,0)

    for i in range(1,periodos_frente+1):
      name1='delta_close'+str(i)
      name2='baixa'+str(i)
      df = df.drop([name1,name2],axis=1)
      
    df.dropna(inplace=True)
    return df

def high_target(DF, alta_esperada, periodos_frente):
    '''avalia n períodos à frente, se o valor máximo atinge a alta_esperada'''
    df = DF.copy()
    df['high_target'] = 0

    '''cria n colunas delta, uma para cada período a frente e calcula a diferença entre fechamento futuro e atual'''
    for i in range(1,periodos_frente+1):
      name='delta_close'+str(i)
      df[name] = (df['close'].shift(-i) - df['close'])/df['close']

    for i in range(1,periodos_frente+1):
      #df['high_target'] = np.where((df['delta_close'+str(i)]>=alta_esperada) | (df['delta_low'+str(i)]>=alta_esperada) | (df['delta_high'+str(i)]>=alta_esperada), 1, 0)
      df['high_target'+str(i)] = np.where(df['delta_close'+str(i)]>=alta_esperada,1,0)

    for i in range(1,periodos_frente+1):
      df['high_target'] = df['high_target'] + df['high_target'+str(i)]
    
    df['high_target'] = np.where(df['high_target']>=1,1,0)

    for i in range(1,periodos_frente+1):
      name1='delta_close'+str(i)
      name2='high_target'+str(i)
      df = df.drop([name1,name2],axis=1)

    df.dropna(inplace=True)
    return df

In [27]:
#Usar em caso de low_target
#baixa_esperada = 0.004
#df = low_target(df,baixa_esperada,6)

percentual_alta = alvo/100
alta_esperada = alvo/100
df_train = high_target(df_train,alta_esperada,6) #target

#definindo o target = df['high_target']
df_train['y']=df_train['high_target']
del df_train['high_target'], df_train['tick_volume'], df_train['spread'], df_train['real_volume'], df_train['open'], df_train['high'], df_train['low']

In [28]:
df_train.head()

Unnamed: 0_level_0,close,MM3,STD3,EWM3,TR3,ATR3,BBsup3,BBinf3,BBlarg3,RSI3,MM5,STD5,EWM5,TR5,ATR5,BBsup5,BBinf5,BBlarg5,RSI5,MM8,STD8,EWM8,TR8,ATR8,BBsup8,BBinf8,BBlarg8,RSI8,MM13,STD13,EWM13,TR13,ATR13,BBsup13,BBinf13,BBlarg13,RSI13,MM21,STD21,EWM21,TR21,ATR21,BBsup21,BBinf21,BBlarg21,RSI21,MM34,STD34,EWM34,TR34,ATR34,BBsup34,BBinf34,BBlarg34,RSI34,MM55,STD55,EWM55,TR55,ATR55,BBsup55,BBinf55,BBlarg55,RSI55,MM89,STD89,EWM89,TR89,ATR89,BBsup89,BBinf89,BBlarg89,RSI89,MM144,STD144,EWM144,TR144,ATR144,BBsup144,BBinf144,BBlarg144,RSI144,y
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1
2005-12-01 14:00:00,1.1733,1.175333,0.001818,1.175013,0.0048,0.0038,1.177916,1.172751,0.005165,2.576077,1.17656,0.002115,1.175871,0.0048,0.00284,1.177899,1.175221,0.002677,6.10398,1.177113,0.001772,1.176579,0.0048,0.002275,1.178075,1.17615,0.001925,12.646811,1.177623,0.001523,1.177181,0.0048,0.001877,1.178354,1.176892,0.001463,22.824117,1.178157,0.001387,1.177622,0.0048,0.001771,1.178543,1.177771,0.000772,32.930716,1.1781,0.00123,1.177929,0.0048,0.001979,1.179571,1.176629,0.002942,40.28048,1.178442,0.00171,1.178046,0.0048,0.002089,1.180992,1.175892,0.0051,44.742947,1.177656,0.004358,1.177923,0.0048,0.002165,1.178111,1.177201,0.00091,47.247658,1.177857,0.003871,1.177543,0.0048,0.002037,1.180393,1.175321,0.005072,47.166521,0
2005-12-01 15:00:00,1.1706,1.173267,0.00265,1.172807,0.0034,0.003933,1.177039,1.169494,0.007545,1.29066,1.175,0.003077,1.174114,0.0034,0.00316,1.17758,1.17242,0.005161,3.613504,1.176175,0.002838,1.175251,0.0034,0.002537,1.177723,1.174627,0.003097,8.427199,1.177008,0.002437,1.176241,0.0034,0.002038,1.178135,1.17588,0.002255,17.163763,1.177771,0.002146,1.176984,0.0034,0.001852,1.178183,1.177359,0.000824,27.532672,1.177888,0.00178,1.17751,0.0034,0.002029,1.179226,1.176551,0.002675,36.253893,1.178209,0.001884,1.17778,0.0034,0.002102,1.180698,1.175721,0.004977,42.083746,1.177636,0.004387,1.177761,0.0034,0.002188,1.178091,1.177181,0.000909,45.587018,1.17779,0.003912,1.177447,0.0034,0.002042,1.180281,1.175298,0.004982,46.074362,0
2005-12-01 16:00:00,1.1711,1.171667,0.001436,1.171953,0.003,0.003733,1.175343,1.16799,0.007353,13.306865,1.17354,0.002777,1.173109,0.003,0.00356,1.177172,1.169908,0.007264,11.931279,1.1753,0.003214,1.174328,0.003,0.002725,1.17752,1.17308,0.00444,14.46701,1.176415,0.002864,1.175507,0.003,0.002177,1.177977,1.174853,0.003124,21.089772,1.177414,0.002582,1.176449,0.003,0.001886,1.177962,1.176866,0.001096,29.771118,1.177653,0.002112,1.177144,0.003,0.002056,1.178886,1.17642,0.002466,37.446953,1.178004,0.002029,1.177541,0.003,0.002124,1.180432,1.175575,0.004858,42.725784,1.177627,0.0044,1.177613,0.003,0.00221,1.178081,1.177173,0.000908,45.942864,1.177735,0.00395,1.17736,0.003,0.002039,1.180181,1.175289,0.004892,46.306213,0
2005-12-01 17:00:00,1.1702,1.170633,0.000451,1.171077,0.0031,0.003167,1.173287,1.16798,0.005307,10.015104,1.17222,0.002381,1.17214,0.0031,0.00358,1.176514,1.167926,0.008587,9.991306,1.174338,0.003467,1.173411,0.0031,0.002962,1.177277,1.171398,0.005878,12.738613,1.175769,0.003251,1.174749,0.0031,0.002292,1.177786,1.173753,0.004033,19.305554,1.176976,0.002978,1.175881,0.0031,0.001948,1.177768,1.176185,0.001583,28.128943,1.177403,0.002459,1.176747,0.0031,0.002118,1.178561,1.176245,0.002316,36.190795,1.177796,0.002228,1.177279,0.0031,0.002149,1.180169,1.175424,0.004744,41.874736,1.177613,0.004421,1.177448,0.0031,0.002229,1.178067,1.17716,0.000907,45.402337,1.177669,0.003996,1.177261,0.0031,0.002043,1.180069,1.175268,0.004801,45.948132,0
2005-12-01 18:00:00,1.1711,1.1708,0.00052,1.171088,0.0026,0.0029,1.17191,1.16969,0.002219,34.368361,1.17126,0.001201,1.171793,0.0026,0.00338,1.175507,1.167013,0.008494,25.195008,1.173425,0.003195,1.172897,0.0026,0.003062,1.176994,1.169856,0.007138,23.221826,1.175231,0.003409,1.174227,0.0026,0.0024,1.177688,1.172774,0.004914,26.080374,1.1766,0.0032,1.175446,0.0026,0.002005,1.177669,1.175531,0.002138,32.063679,1.177171,0.002668,1.176425,0.0026,0.002147,1.178284,1.176058,0.002226,38.322463,1.177598,0.002329,1.177058,0.0026,0.002169,1.179919,1.175278,0.004641,43.030516,1.177636,0.004382,1.177307,0.0026,0.002227,1.178091,1.17718,0.000911,46.04435,1.177608,0.004029,1.177176,0.0026,0.002045,1.179962,1.175253,0.004709,46.36578,0


In [None]:
#workbook='/content/drive/MyDrive/ColabNotebooks/datasets/trading/dataframe_eurusd.xlsx'
#df.to_excel(workbook)

In [29]:
X= df_train.iloc[:,:-1]  #da primeira à penultima (variáveis dependentes)
y= df_train.iloc[:,-1]    #ultima coluna (dados de previsão)

In [38]:
#balanceamento SMOTE
exp_clf = setup(df_train,
                target = 'y', 
                fold_strategy = 'timeseries',
                fold = 15,
                train_size=0.7,
                feature_selection = True,
                fix_imbalance = True,
                #feature_selection_threshold = 0.70,  '''para long_target '''
                feature_selection_threshold = 0.70,
                remove_multicollinearity = True,
                multicollinearity_threshold = 0.80,
                remove_perfect_collinearity = True,
                normalize = True,
                n_jobs=-1,
                session_id = 123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,y
2,Target Type,Binary
3,Label Encoded,
4,Original Data,"(92021, 83)"
5,Missing Values,False
6,Numeric Features,82
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


In [34]:
best = compare_models(sort = 'MCC', include = ['et','lightgbm','rf'])

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.891,0.7734,0.2827,0.4034,0.3322,0.2748,0.2801,6.583
rf,Random Forest Classifier,0.8656,0.7552,0.3386,0.314,0.3257,0.2512,0.2514,18.86
lightgbm,Light Gradient Boosting Machine,0.7003,0.7156,0.587,0.1775,0.2717,0.1467,0.1894,0.856


In [None]:
#criando modelos
model = create_model('et')
model_tuned = tune_model(estimator = model,  fold = 15,  round = 4,  n_iter = 100,  optimize = 'MCC', verbose = True)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8693,0.6792,0.1443,0.2327,0.1781,0.1114,0.1151
1,0.876,0.7241,0.2044,0.266,0.2311,0.165,0.1667
2,0.8763,0.7325,0.2294,0.309,0.2633,0.1974,0.2
3,0.8817,0.7397,0.2487,0.3587,0.2938,0.2315,0.2362
4,0.888,0.742,0.2732,0.3679,0.3135,0.254,0.2573
5,0.8837,0.7503,0.2711,0.3668,0.3118,0.2498,0.2532
6,0.8872,0.7714,0.2655,0.4038,0.3204,0.2617,0.2686
7,0.8917,0.7839,0.3027,0.3862,0.3394,0.2813,0.2838
8,0.8892,0.7936,0.2741,0.4221,0.3323,0.2749,0.2825
9,0.8947,0.8036,0.2884,0.4129,0.3396,0.2843,0.2897


IntProgress(value=0, description='Processing: ', max=7)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC


Fitting 15 folds for each of 100 candidates, totalling 1500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed: 10.6min
[Parallel(n_jobs=-1)]: Done 446 tasks      | elapsed: 18.3min


In [None]:
model_tuned

ExtraTreesClassifier(bootstrap=True, ccp_alpha=0.0, class_weight={},
                     criterion='gini', max_depth=11, max_features=1.0,
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0, min_impurity_split=None,
                     min_samples_leaf=4, min_samples_split=10,
                     min_weight_fraction_leaf=0.0, n_estimators=140, n_jobs=-1,
                     oob_score=False, random_state=123, verbose=0,
                     warm_start=False)

In [None]:
evaluate_model(model_tuned)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [None]:
#avaliar modelo

# generate predictions on the original dataset
predictions_df = predict_model(model_tuned, data=df_train)
predictions_df = predictions_df.reset_index()


#'matriz de confusão avaliando n itens à frente'

#def evaluate_pred(DF, alta_esperada, periodos_frente):
        
#    df = DF.copy()
#    df['evaluate_pred'] = 0

#    '''cria n colunas delta, uma para cada período a frente e calcula a diferença entre fechamento futuro e atual'''
#    for i in range(1,periodos_frente+1):
#      name='delta_close'+str(i)
#      if df[df['Label']]==1:
#        df[name] = (df['close'].shift(-i) - df['close'])/df['close']
    
#    for i in range(1,periodos_frente+1):
#      df['evaluate_pred'+str(i)] = np.where(df['delta_close'+str(i)]>=alta_esperada,1,0)

#    for i in range(1,periodos_frente+1):
#      df['evaluate_pred'] = df['evaluate_pred'] + df['evaluate_pred'+str(i)]
    
    #df['evaluate_pred'] = np.where(df['evaluate_pred']>=1,1,0)

#    for i in range(1,periodos_frente+1):
#      name1='delta_close'+str(i)
#      name2='evaluate_pred'+str(i)
#      df = df.drop([name1,name2],axis=1)

#    df.dropna(inplace=True)
#    return df


#percentual_alta = 0.004
#alta_esperada = 0.004
#eval = evaluate_pred(df_train,alta_esperada,6) #target


#y_pred=predictions_df['Label']
#y_pred = pd.DataFrame(y_pred)
#y_pred.set_index(df_train.index, inplace=True)
#y_true = df_train['y']

#from sklearn.metrics import plot_confusion_matrix
#confusion_matrix(y_true, y_pred)

'matriz de confusão avaliando n itens à frente'

In [None]:
eval.evaluate_pred.value_counts()


In [None]:
predictions_df.head(10)

In [None]:
#finalize a model
final_model = finalize_model(model_tuned)
save_model(final_model, '/content/drive/MyDrive/ColabNotebooks/datasets/trading/modelos/long_target_et')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='y',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strategy='...
                  ExtraTreesClassifier(bootstrap=True, ccp_alpha=0.0,
                                       class_weight={}, criterion='gini',
                                       max_depth=11, max_features=1.0,
                                       max_leaf_nodes=None,

In [None]:
#salvando modelo
#import pickle
#filename = '/content/drive/MyDrive/ColabNotebooks/datasets/trading/modelos/et_tuned01.pkl'
#pickle.dump(final_model, open(filename, 'wb'))

In [None]:
#importando modelo short_target_et
short_target = load_model('/content/drive/MyDrive/ColabNotebooks/datasets/trading/modelos/short_target_et')

Transformation Pipeline and Model Successfully Loaded


In [None]:
#importando df_train usado pra ajustar o outro indicador
file = open("/content/drive/MyDrive/ColabNotebooks/datasets/trading/modelos/df_train_short_target.pkl",'rb')
df_train_short_target = pickle.load(file)
file.close()

#importando df_valid do outro indicador
file = open('/content/drive/MyDrive/ColabNotebooks/datasets/trading/modelos/df_valid_short_target.pkl','rb')
df_valid_short_target = pickle.load(file)
file.close()

In [None]:
# generate predictions on the original dataset
predictions_df = predict_model(final_model, data=df_train)
predictions_df = predictions_df.reset_index()

#previsão com modelo short_target
#predictions_short_target = predict_model(short_target, data=df_train_short_target)
#predictions_short_target = predictions_short_target.reset_index()

def alinhar_pontos(DF):
    '''cria uma coluna, em que o valor da Label será igual ao close*(1+percentual_alta) caso Label=1 '''
    df = DF.copy()
    df['Label'] = np.where(df['Label']==1,df['close'],0)
    #df['high_target'] = np.where(df['baixa']==1,df['close'],0)
    return df

predictions_df = alinhar_pontos(predictions_df)
#predictions_short_target = alinhar_pontos(predictions_short_target)

In [None]:
# line plot
import plotly.express as px
import plotly.graph_objects as go
fig = px.line(predictions_df, x='time', y='close', template = 'plotly_dark')

#fig = go.Figure(data=[go.Candlestick(x=predictions_df['time'],
#                open=predictions_df['open'],
#                high=predictions_df['high'],
#                low=predictions_df['low'],
#                close=predictions_df['close'])])

fig.add_scatter(x=predictions_df['time'], y=predictions_df['Label'], mode='markers', marker_color='rgba(0, 255, 0, .8)')
#fig.add_scatter(x=predictions_short_target['time'], y=predictions_short_target['Label'], mode='markers', marker_color='rgba(255, 0, 0, .8)')
#fig.add_scatter(x=predictions_df_teste['time'], y=predictions_df_teste['baixa'], mode='markers', marker_color='rgba(255, 0, 0, .8)')

ymin=predictions_df.close.min()
ymax=predictions_df.close.max()
fig.update_yaxes(range=[ymin, ymax])
fig.show()
fig.write_html('/content/drive/MyDrive/ColabNotebooks/datasets/trading/modelos/long_target_et.html')

Em que horários os indicadores performam bem?

In [None]:
#! pip install backtrader

In [None]:
df_valid.head()

In [None]:
#criando featues para df_valid
lista=[3, 5, 8, 13, 21, 34, 55, 89, 144]

df_valid_eval = df_valid.copy()
for n in lista:
    df_valid_eval=nIndicadores(df_valid_eval,n)

#Usar em caso de low_target
#baixa_esperada = 0.004
#df = low_target(df,baixa_esperada,6)

percentual_alta = alvo/100
alta_esperada = alvo/100
#df_valid_eval = high_target(df_valid_eval,alta_esperada,6) #target

#definindo o target = df['high_target']
#df_valid_eval['y']=df_valid_eval['high_target']
#del df_valid_eval['high_target'] 
del df_valid_eval['tick_volume'], df_valid_eval['spread'], df_valid_eval['real_volume']	

In [None]:
df_valid_eval.shape

(5021, 85)

In [None]:
# testando dados no dataset de validação
predictions_df_teste = predict_model(model_tuned, data=df_valid_eval)
predictions_df_teste = predictions_df_teste.reset_index()

#predictions_short_target_teste = predict_model(short_target, data=df_valid_short_target)
#predictions_short_target_teste = predictions_short_target_teste.reset_index()

In [None]:
def alinhar_pontos(DF):
    '''cria uma coluna, em que o valor da Label será igual ao close*(1+percentual_alta) caso Label=1 '''
    df = DF.copy()
    df['Label'] = np.where(df['Label']==1,df['close'],0)
    #df = df.drop(['delta'],axis=1)
    #df.dropna(inplace=True)  
    return df

predictions_df_teste = alinhar_pontos(predictions_df_teste)
#predictions_short_target_teste = alinhar_pontos(predictions_short_target_teste)

In [None]:
import plotly.express as px
import plotly.graph_objects as go
fig = px.line(predictions_df_teste, x='time', y='close', template = 'plotly_dark')

fig.add_scatter(x=predictions_df_teste['time'], y=predictions_df_teste['Label'], mode='markers', marker_color='rgba(0, 255, 0, .8)')
#fig.add_scatter(x=predictions_short_target_teste['time'], y=predictions_short_target_teste['Label'], mode='markers', marker_color='rgba(255, 0, 0, .8)')

#fig.add_scatter(x=predictions_df_teste['time'], y=predictions_df_teste['baixa'], mode='markers', marker_color='rgba(255, 0, 0, .8)')

ymin=predictions_df_teste.close.min()
ymax=predictions_df_teste.close.max()
fig.update_yaxes(range=[ymin, ymax])

fig.show()
fig.write_html('/content/drive/MyDrive/ColabNotebooks/datasets/trading/modelos/long_target_valid_et.html')

In [None]:
!pip install bta-lib
!pip install bt
import btalib as bta

In [None]:
predictions_df_teste.head()

In [None]:
close = pd.DataFrame()
close['sinal'] = np.where(predictions_df_teste['Label']>=1,1,0)
close['sinal'] = pd.to_numeric(close['sinal'])
close.set_index(predictions_df_teste.time, inplace=True)
close.fillna(0)
#close.value_counts()

venda = pd.DataFrame()
venda['sinal'] =np.where(predictions_short_target_teste['Label']>=1,-1,0)
venda['sinal'] = pd.to_numeric(venda['sinal'])
venda.fillna(0)
#venda.value_counts()

posicao = venda.copy()
posicao['sinal'] = 0
posicao[close.values == 1] = 1.0
posicao[venda.values == -1] = -1.0
posicao.set_index(predictions_df_teste.time, inplace=True)
posicao['sinal'].fillna(0)
posicao.value_counts()

sinal
 0       670
-1        44
 1         6
dtype: int64

In [None]:
close = pd.DataFrame()
close['sinal'] = predictions_df_teste['close']
close.set_index(predictions_df_teste.time, inplace=True)

#3. Criar estrutura de backtesting

In [None]:
#https://pmorissette.github.io/bt/
#https://colab.research.google.com/drive/1K56P-z6RK01tLYSvfRVadxQGKUp7h7VO?usp=sharing#scrollTo=EepGT0mwt6ou

In [None]:
# download data
# calculate moving average DataFrame using pandas' rolling_mean
# a rolling mean is a moving average, right?
#sma = df_train.close.rolling(5).mean()

In [None]:
import bt
et_strategy = bt.Strategy('et_strategy', [bt.algos.WeighTarget(posicao),
                                bt.algos.Rebalance()])

et_teste = bt.Backtest(et_strategy, close)
resultado = bt.run(et_teste)
#resultado.display()
#resultado.plot()
#resultado.plot_weights('et_strategy')

In [None]:
s1 = bt.Strategy('s1', [bt.algos.SelectAll(),
                        bt.algos.WeighTarget(posicao),
                        bt.algos.Rebalance()])

s1_teste = bt.Backtest(s1, close, initial_capital=1000)
#, commissions=0.1)
                 #integer_positions=integer_positions)

res = bt.run(s1_teste)
#res.display()
#res.plot()
#res.plot_weights('s1')

In [None]:
resultado.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7fd1054daf50>

In [None]:
resultado.plot_histogram()

In [None]:
resultado.plot_security_weights()

In [None]:
# create our new strategy
s2 = bt.Strategy('s2', [bt.algos.RunDaily(),
                        bt.algos.SelectAll(),
                        bt.algos.WeighInvVol(),
                        bt.algos.Rebalance()])

# now let's test it with the same data set. We will also compare it with our first backtest.
s2_teste = bt.Backtest(s2, close)
# we include test here to see the results side-by-side
resultado2 = bt.run(s1_teste, s2_teste)
#resultado2.display()

In [None]:
resultado2.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7fd105263450>

In [None]:

btc_close = pd.DataFrame()
btc_close['sinal'] = predictions_df_teste['close']

btc_close.set_index(predictions_df_teste['time'], inplace=True)
btc_bands = bta.bbands(btc_close, period=20, devs=2.0).df
btc_up = pd.DataFrame()
btc_down = pd.DataFrame()

btc_down['sinal'] = btc_bands['bot']
btc_up['sinal'] =btc_bands['top']
btc_down['sinal'] = pd.to_numeric(btc_down['sinal'])
btc_up['sinal'] = pd.to_numeric(btc_up['sinal'])

btc_down.fillna(0)
btc_up.fillna(0)

posicao2 = btc_up.copy()
posicao2['sinal'] = 0

posicao2[btc_close > btc_up] = -1.0
posicao2[btc_close <= btc_down] = 1.0
posicao2['sinal'].fillna(0)

bbands = bt.Strategy('bbands', [bt.algos.WeighTarget(posicao2),
                                bt.algos.Rebalance()])

bteste = bt.Backtest(bbands, btc_close)

resultado2 = bt.run(bteste)
#resultado2.display()
#resultado2.plot()
#resultado2.plot_weights('bbands')

mm50 = pd.DataFrame()
mmm200 = pd.DataFrame()
mm50 = bta.sma(btc_close.sinal, period=50).df
mm200 = bta.sma(btc_close.sinal, period=200).df
mm50['sinal'] = mm50
mm200['sinal'] =mm200
posicao_2 = btc_up.copy()
posicao_2['sinal'] =0
posicao_2[mm50 > mm200] = 1.0
posicao_2[mm50 <=mm200] = -1.0
posicao_2.fillna(0)
mmovel = bt.Strategy('mmovel', [bt.algos.WeighTarget(posicao_2),
                                bt.algos.Rebalance()])

teste_sma = bt.Backtest(mmovel, btc_close)

benchmark = bt.Strategy('benchmark', [bt.algos.RunYearly(),
                                      bt.algos.SelectAll(),
                                      bt.algos.WeighEqually(),
                                      bt.algos.Rebalance()])

benchmark_teste = bt.Backtest(benchmark, btc_close)
b_resultado = bt.run(et_teste, bteste, teste_sma, benchmark_teste)
b_resultado.display()

Stat                 et_strategy    bbands      mmovel      benchmark
-------------------  -------------  ----------  ----------  -----------
Start                2021-09-13     2021-09-13  2021-09-13  2021-09-13
End                  2021-10-26     2021-10-26  2021-10-26  2021-10-26
Risk-free rate       0.00%          0.00%       0.00%       0.00%

Total Return         0.29%          -0.14%      1.36%       -1.73%
Daily Sharpe         1.58           -0.47       3.10        -3.39
Daily Sortino        2.76           -0.58       5.52        -5.12
CAGR                 2.49%          -1.15%      12.17%      -13.77%
Max Drawdown         -0.49%         -0.84%      -0.57%      -2.42%
Calmar Ratio         5.06           -1.37       21.37       -5.70

MTD                  0.09%          -0.01%      -0.01%      0.23%
3m                   -              -           -           -
6m                   -              -           -           -
YTD                  0.29%          -0.14%      1.36%     

In [None]:
b_resultado.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7fd1051bb750>

In [None]:
#finalize a model
final_model = finalize_model(model_tuned)

In [None]:
df_result = pd.DataFrame(b_resultado)

ValueError: ignored

In [None]:
model_tuned

ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0,
                     class_weight='balanced_subsample', criterion='gini',
                     max_depth=6, max_features=1.0, max_leaf_nodes=None,
                     max_samples=None, min_impurity_decrease=0,
                     min_impurity_split=None, min_samples_leaf=4,
                     min_samples_split=7, min_weight_fraction_leaf=0.0,
                     n_estimators=200, n_jobs=-1, oob_score=False,
                     random_state=123, verbose=0, warm_start=False)