In [183]:
import pandas as pd                               # panda
import pandas_datareader as pdr                   # Baixar dados yahoo

import matplotlib.pyplot as plt                   # grafico
import matplotlib.dates as mdates                 # trabalhar datas

import statsmodels.api as sm                      # regressao
from statsmodels.api import add_constant          # para a regressao considerar o intercepto

import math as math
import numpy as np
import scipy.stats


from statsmodels.tsa.stattools import coint       # Funcao Coint ()
from statsmodels.tsa.stattools import adfuller

# Declaracoes
pd.options.display.float_format = '{:.2f}'.format

#!pip install MetaTrader5
#!pip install --upgrade MetaTrader5
import MetaTrader5 as mt5
from datetime import datetime
from datetime import date
from datetime import timedelta
import time
import pytz

import ipywidgets

%matplotlib inline

In [184]:
def Get_data(tickers,inicio,fim,janela):

    # Checa se ja estamos conectados, se não, conecta
    if (mt5.terminal_info()==None):
          
        # conecte-se ao MetaTrader 5
        if not mt5.initialize():
            print("initialize() failed, error code =",mt5.last_error())
            mt5.shutdown()
        
    #quebra as strings dates(inicio,fim) recebidas e transformando em integer
    inicio_= inicio.split('-')
    inicio = [int(element) for element in inicio_]
    fim_= fim.split('-')
    fim = [int(element) for element in fim_]

    timezone = pytz.timezone("Etc/UTC")
    
    #Verifica caso seja timeframe diario ou intraday
    if janela == '1d':
        dt_inicio = datetime(inicio[0], inicio[1], inicio[2], tzinfo=timezone)
        dt_fim = datetime(fim[0], fim[1], fim[2], tzinfo=timezone)
        timeframe = mt5.TIMEFRAME_D1
    #aqui trata as datas intraday
    else:
        dt_inicio = datetime(inicio[0], inicio[1], inicio[2], inicio[3], inicio[4], tzinfo=timezone)
        dt_fim = datetime(fim[0], fim[1], fim[2], fim[3], fim[4], tzinfo=timezone)
        timeframe = mt5.TIMEFRAME_M15
        
    # obtendo o par de acoes com base nas datas enviadas nas strings, caso len seja 1, getdata retorna o dataframe de uma unica acao
    if (len (tickers) == 1):        
        stock1 = mt5.copy_rates_range(tickers[0], timeframe, dt_inicio, dt_fim)    
        stock_df = pd.DataFrame(stock1)[['close','time']].rename(columns={'close': tickers[0]})
                
    else:    
        stock1 = mt5.copy_rates_range(tickers[0], timeframe, dt_inicio, dt_fim)
        stock2 = mt5.copy_rates_range(tickers[1], timeframe, dt_inicio, dt_fim)
        esquerda = pd.DataFrame(stock1)[['close','time']].rename(columns={'close': tickers[0]})
        direita = pd.DataFrame(stock2)[['close','time']].rename(columns={'close': tickers[1]})
        stock_df = pd.merge(left=esquerda, right=direita, left_on='time', right_on='time')
    
    #Se o timeframe for intraday, retorna um datetime (com minutos e horas), caso contrario, retorna um date
    if (janela != '1d'):
        stock_df['Date'] = stock_df['time'].apply(lambda x: str (datetime.fromtimestamp(x, tz=timezone))[:-9]) #define timezone
    else:
        stock_df['Date'] = stock_df['time'].apply(lambda x: date.fromtimestamp(x+21600)) #adicionando 21600 pra corrigir o fuso no momento de converter tsp em str
        stock_df.drop('time',axis=1,inplace = True)
        
    return stock_df.set_index('Date')

In [185]:
# Gera a REGRESSAO LINEAR MULTIVARIADA (QuantGo "Simples") -> y = b*x + c*t + d + e
def regressao_multivariada(x, y, periodo):  
    
    X = np.column_stack((x, range(1,periodo+1,1)))
    X = sm.add_constant(X)
    res = sm.OLS(y,X).fit()
    coef_ang=res.params[1]
    residuo = y-res.predict()
    zscore  = (residuo - np.mean(residuo))/np.std(residuo);
    return coef_ang, residuo, zscore, res


def regressao_residuos(residuos):
  
    residuos_shifted = residuos.diff(1).fillna(method="bfill")  
    delta = residuos - residuos_shifted
    X = sm.add_constant(residuos_shifted, prepend=True)
    res = sm.OLS(residuos,X).fit()
    return res.params[1]

# ADF da Statsmodel e teste de significancia com os critical values do Ferro
def adftest(df,reg):

    adf  = adfuller(df,maxlag=1, autolag="BIC")
    
    adf_stat = adf[0]
    adf_pvalue = adf[1]
    
    ## %ADF clássico, de acordo com o output da função  - NAO UTILIZADO
    if   adf[0] < adf[4]['1%']:  adfc='99%'
    elif adf[0] < adf[4]['5%']:  adfc='95%'
    elif adf[0] < adf[4]['10%']: adfc='90%'
    else:                        adfc='-'                 

    ## %ADF do Ferro
    # Utiliza duas tabelas - uma para quando o tempo é significante, outra quando não é (indep. do noobs)
    ttest = reg.params[1]/reg.bse[1]
    critical_value_tempo = scipy.stats.t.ppf(0.01,df=(len(df)-3)) # Retorna o inverso bicaudal da distribuição t de Student

    if (abs(ttest) < critical_value_tempo): aceitar_t0 = '99%'
    else:  aceitar_t0 = '0%'
    
    if (aceitar_t0 == '0%'):
        if   adf[0] < -4.32:         adf_sign='99%'
        elif adf[0] < -3.67:         adf_sign='95%'
        elif adf[0] < -3.28:         adf_sign='90%'
        else:                        adf_sign='-' 
    else:    
        if   adf[0] < -3.58:         adf_sign='99%'
        elif adf[0] < -3.22:         adf_sign='95%'
        elif adf[0] < -2.60:         adf_sign='90%'
        else:                        adf_sign='-'                 

    
    
        
    return adf_stat, adf_sign, adf_pvalue  


def pearsonr_ci(x,y,alpha=0.05):
    ''' calculate Pearson correlation along with the confidence interval using scipy and numpy Parameters
    ----------
    x, y : iterable object such as a list or np.array
      Input for correlation calculation
    alpha : float
      Significance level. 0.05 by default
    Returns
    -------
    r : float
      Pearson's correlation coefficient
    pval : float
      The corresponding p value
    lo, hi : float
      The lower and upper bound of confidence intervals
    '''
    
    r, p = scipy.stats.pearsonr(x,y)
    r_z = np.arctanh(r)
    se = 1/np.sqrt(x.size-3)
    z = scipy.stats.norm.ppf(1-alpha/2)
    lo_z, hi_z = r_z-z*se, r_z+z*se
    lo, hi = np.tanh((lo_z, hi_z))
    
    return r, p, lo, hi
    
def pct_financeiro(x,y,coef_ang,residuo):
    
    ultimo_x = float(x.tail(1))
    ultimo_y = float(y.tail(1))
    
    fin_x = ultimo_x*coef_ang
    fin_y = ultimo_y
    
    if (residuo > 0):
        compra = fin_x
        venda = fin_y
    else:
        compra = fin_y
        venda = fin_x
 
    cv = "{:.0%}".format(compra/venda)
    return cv


def calculo_meia_vida1(residuo):
    # Retirado de um paper e adaptado pela formula da planilha do Ferro
    price = pd.Series(residuo)  
    lagged_price = price.shift(1).fillna(method="bfill")  
    delta = price - lagged_price  
    beta = np.polyfit(lagged_price, delta, 1)[0] 
    #half_life = ((-2*np.log(2))/beta)  # paper (varios), mas sem o 2*
    half_life = 2/(-1*np.log(1+beta))   # planilha ferro
     
    return (int(round(half_life)))


In [186]:
def ajusta_B3Date (data_start, qtde_dias):
    
    df = pd.read_excel('Calendar.xlsx')
    df['Date'] = df['Date'].apply(str)
    index = df [df['Date'] == data_start].index.values.astype(int)[0]
    data_fim = df.iloc[index+qtde_dias][1]
    
    return (data_fim + '-17-00')

    
def simula_trade (ticker, beta_inicio, dp_entrada, dp_target, start_time , hlife):
    
    #passando a data de inicio do trade para identificarmos qual a data limite para o trade com base no calendario da B3
    end_time = ajusta_B3Date(start_time[:-6],hlife)
    #print (start_time)
    #print (end_time)
    stock = Get_data(ticker, start_time, end_time,'M5')
    
    return stock
    
def merge_intraday (ticker, dt_inicio, dt_intraday):

    dt_intraday_ant = datetime.strptime(dt_intraday, '%Y-%m-%d-%H-%M') 
    dt_intraday_ant = dt_intraday_ant - timedelta(days=1)  # Converte p/ data e subtrai 1 dia
    dt_fim = dt_intraday_ant.strftime('%Y-%m-%d-%H-%M')
    
    stock  = Get_data(ticker, dt_inicio,dt_fim,'1d')
    stock_intraday = Get_data(ticker, dt_intraday,dt_intraday,'M15')
    
    stock_intraday['Date'] =  stock_intraday['time'].apply(date.fromtimestamp)
    stock_intraday.set_index('Date', inplace = True)
    stock_concat = pd.concat([stock,stock_intraday])
    stock_concat.drop('time',axis=1,inplace = True)

    return stock_concat

In [188]:
def Get_Cubo (tickers, dt_cubo, intra_bt):
    
    #Pegando a data B3 de 250 regoes atras
    if intra_bt:
        dt_inicio = ajusta_B3Date(dt_cubo[:-6],-250)
    else:
        dt_inicio = ajusta_B3Date(dt_cubo,-250)
    
    #print (dt_inicio)
    
    if intra_bt:
        #Para retornar o par com dados diarios, com ultimo dia sendo o intraday passado em dt_final
        par = merge_intraday (tickers, dt_inicio, dt_cubo)
    else:   
        # Busca cotacoes no Yahoo Finance
        par = Get_data(tickers, dt_inicio, dt_cubo,'1d')

    # Cria o cubo de periodos
    cubo = pd.DataFrame(columns=['preco_y','preco_x','ratio','periodo', 'adf_stat','adf_sign','coef_ang',
                                 'desvio','pct_fin','meia_vida', 'corr', 'fx_corr','per_coint','vol_beta_vertical','fx_vol_beta_vertical',
                                 'vol_beta_vertical1','fx_vol_beta_vertical1','adf99_periodos','dp_residuo'])

    # Popula o cubo de periodos
    #for i in range(100,260,10):
    for i in [100,120,140,160,180,200,220,240,250]:

        # Cria subset do tamanho do periodo atual da iteração e seta a variavel indep (x) e dependente (y)
        par_subset = par.tail(i)
        x = par_subset[tickers[1]]
        y = par_subset[tickers[0]]

        # Adiciona o PERIODO analisado no dataframe
        cubo = cubo.append({'periodo': int(i)}, ignore_index=True)
        # Gera a REGRESSAO e retorna o Coef Ang, os residuos e o z-score. 
        # Retorna tbm o vetor de retorno da regressao para ser usado no ADF
        coef_ang, residuo, zscore, reg  = regressao_multivariada(x, y, i)

        # Teste de Estacionariedade dos Resíduos (ADF)
        adf_stat,adf_sign,adf_pvalue = adftest(residuo, reg)
         
        # Calculo do Fisher
        fisher_r, fisher_pvalue, fisher_lo, fisher_hi = pearsonr_ci(x.diff().fillna(method="bfill"), y.diff().fillna(method="bfill"))

        # Calculo do % Financeiro (C/V)
        pct_fin = pct_financeiro(x,y,coef_ang, float(zscore.tail(1)))

        # Calculo da Meia-vida - ORNSTEIN-UHLENBECK
        meia_vida = calculo_meia_vida1(residuo)

        # Correlação 
        corr = np.corrcoef(x.pct_change().cumsum().dropna(), y.pct_change().cumsum().dropna())[1,0]

        # Se fisher minimo menor que 10%, ou se correlacao < 0, faz adf_sign=0%
        if ((fisher_lo <= 0.1) or (corr<=0) or (coef_ang < 0)):
            adf_sign = "0%"
            
        # Preenchimento do cubo
        cubo.loc[ (cubo['periodo']==i), 'preco_y'    ] = float(y.tail(1))
        cubo.loc[ (cubo['periodo']==i), 'preco_x'    ] = float(x.tail(1))
        cubo.loc[ (cubo['periodo']==i), 'ratio'      ] = float(y.tail(1))/float(x.tail(1))
        cubo.loc[ (cubo['periodo']==i), 'coef_ang'   ] = coef_ang 
        cubo.loc[ (cubo['periodo']==i), 'desvio'     ] = float(zscore.tail(1))
        cubo.loc[ (cubo['periodo']==i), 'adf_stat'   ] = adf_stat
        cubo.loc[ (cubo['periodo']==i), 'adf_sign'   ] = adf_sign 
        cubo.loc[ (cubo['periodo']==i), 'pct_fin'    ] = pct_fin
        cubo.loc[ (cubo['periodo']==i), 'meia_vida'  ] = meia_vida
        cubo.loc[ (cubo['periodo']==i), 'corr'       ] = corr
        cubo.loc[ (cubo['periodo']==i), 'fx_corr'    ] = math.ceil(corr*5) # Faixas:  0 a 0.2 / 0.2 a 0.4 / 0.4 a 0.6 / 0.6 a 0.8 / 0.8 a 1.0 ) 
        cubo.loc[ (cubo['periodo']==i), 'dp_residuo' ] = np.std(residuo) 


    ######### Criando algumas variaveis para o backtest
    
    # PER_COINT - Conta quantos periodos estao cointegrados
    cubo_cointegrado = cubo.loc[cubo['adf_sign'] != '-']  
    periodos_coint = len(cubo_cointegrado)                

    # DESVIO_BETA - desvio do coef angular considerando todos os periodos
    desvio_beta = cubo['coef_ang'].std()/cubo['coef_ang'].mean()    
    
    # DESVIO_BETA1 - desvio do coef angular considerando apenas os periodos cointegrados
    desvio_beta1 = cubo_cointegrado['coef_ang'].std()/cubo_cointegrado['coef_ang'].mean() 
    
    # FX_DESVIO_BETA - cria faixas de desvio_beta - até 0.05; entre 0.05 e 0.10; entre 0.10 e 0.20; acima de 0.20
    if   (desvio_beta <= 0.05): fx_desvio_beta = '<= 0.05'
    elif (desvio_beta <= 0.10): fx_desvio_beta = '<= 0.10'
    elif (desvio_beta <= 0.20): fx_desvio_beta = '<= 0.20'
    else: fx_desvio_beta = '> 0.20'
    
    # FX_DESVIO_BETA1 - cria faixas de desvio_beta - até 0.05; entre 0.05 e 0.10; entre 0.10 e 0.20; acima de 0.20
    if   (desvio_beta1 <= 0.05): fx_desvio_beta1 = '<= 0.05'
    elif (desvio_beta1 <= 0.10): fx_desvio_beta1 = '<= 0.10'
    elif (desvio_beta1 <= 0.20): fx_desvio_beta1 = '<= 0.20'
    else: fx_desvio_beta1 = '> 0.20'
    
    # ADFS99 (True/False) - se todos os periodos do cubo tiverem adf=99%, então adf99=1, senao adf99=0
    adf99_periodos = len(cubo.loc[cubo['adf_sign'] == '99%'])  
    
    cubo['per_coint']             = periodos_coint
    cubo['adf99_periodos']        = adf99_periodos
    cubo['vol_beta_vertical']     = desvio_beta
    cubo['fx_vol_beta_vertical']  = fx_desvio_beta
    cubo['vol_beta_vertical1']    = desvio_beta1
    cubo['fx_vol_beta_vertical1'] = fx_desvio_beta1
    
    return cubo

In [191]:

def geraCubodecubos(tickers, dtinicio, dtfim):

    start = time.time()

    #### CUBO DE FECHAMENTOS
    cubo_fechamentos = pd.DataFrame(columns=['data','preco_y','preco_x','ratio','periodo', 'adf_stat','adf_sign','coef_ang',
                                             'desvio','pct_fin','meia_vida', 'corr', 'fx_corr','per_coint','vol_beta_vertical','fx_vol_beta_vertical',
                                             'vol_beta_vertical1','fx_vol_beta_vertical1','adf99_periodos','dp_residuo'])

    datas = Get_data(tickers, dtinicio, dtfim,'1d').reset_index()['Date']

    for dat in datas:
        tmp = Get_Cubo(tickers,str(dat),False)
        tmp['data']=str(dat)+'-19-00'   # Adicionando 19h00 para o cubo de fechamento
        cubo_fechamentos = cubo_fechamentos.append(tmp)


    #### CUBO INTRADAY
    cubo_intraday    = pd.DataFrame(columns=['data','preco_y','preco_x','ratio','periodo', 'adf_stat','adf_sign','coef_ang',
                                             'desvio','pct_fin','meia_vida', 'corr', 'fx_corr','per_coint','vol_beta_vertical','fx_vol_beta_vertical',
                                             'vol_beta_vertical1','fx_vol_beta_vertical1','adf99_periodos','dp_residuo'])

    datahora = Get_data(tickers,str(dtinicio+'-10-00') , str(dtfim+'-17-00'),'15m').reset_index()['Date']

    for dthora in datahora:

        # Convertendo 2018-01-02 10:30 para 2018-01-02-10-30
        dthora1 = dthora.split(' ')
        dthora2 = dthora1[0]+"-"+dthora1[1]
        dthora1 = dthora2.split(':')
        dthora2 = dthora1[0]+"-"+dthora1[1]
        tmp = Get_Cubo(tickers,str(dthora2),True)
        tmp['data']=str(dthora2)
        cubo_intraday = cubo_intraday.append(tmp)


    ### Concatenando e ordenando os dois cubos
    cubo_total = cubo_fechamentos.append(cubo_intraday)
    cubo_total = cubo_total.sort_values(['data','periodo'])
    cubo_total.reset_index(drop=True, inplace=True)

    #display(cubo_total)

    end = time.time()
    print('Tempo total = {total}'.format(total = (end - start)))
    
    return cubo_total


In [195]:
# CRIA VARIAVEIS TEMPORAIS


tickers = ['ENAT3','CCRO3'] # bem diferente - todas as variaveis - verificar se olhei certo)

tickers = ['ITUB4','BBSE3']

#cubototal = geraCubodecubos(tickers, '2018-01-01','2018-12-31')
cubototal.to_excel('cubo.xlsx')

#display( cubototal[ cubototal['data'].apply(lambda x: x[-5:]) == '19-00' ])

#cubototal = pd.read_excel('cubo_ITUB4_BBSE3.xlsx').reset_index()

In [196]:
# CRIA VARIAVEIS TEMPORAIS

# MARCA A QUANTIDADE DE DIAS COM ADF=99% EM CADA PERIODO (SEMPRE BASEADO EM FECHAMENTOS DIARIOS ('19-00'))
for periodo in [100,120,140,160,180,200,220,240,250]:
    
    adfcount = 0    
    cubo_tmp = cubototal[cubototal['periodo']==periodo]
    
    date_unique = cubo_tmp['data'].unique()
    
    for date in date_unique:

        row = cubo_tmp.loc[cubo_tmp['data'] == date]
        horario = row['data'].str[-5:].values[0]
        adf = row['adf_sign'].values[0]

        if (horario == '19-00'):
            if (adf == '99%'): adfcount += 1
            else: adfcount = 0

        cubototal.loc[ (cubototal['periodo']==periodo) & (cubototal['data'] == row['data'].values[0]) , 'adfcount'] = adfcount

display(cubototal)



Unnamed: 0,data,preco_y,preco_x,ratio,periodo,adf_stat,adf_sign,coef_ang,desvio,pct_fin,...,corr,fx_corr,per_coint,vol_beta_vertical,fx_vol_beta_vertical,vol_beta_vertical1,fx_vol_beta_vertical1,adf99_periodos,dp_residuo,adfcount
0,2018-01-02-10-00,24.29,21.59,1.13,100.00,-3.29,90%,1.04,-0.47,108%,...,0.81,5.00,1,0.26,> 0.20,,> 0.20,0,0.49,0.00
1,2018-01-02-10-00,24.29,21.59,1.13,120.00,-2.24,-,1.08,-0.99,104%,...,0.69,4.00,1,0.26,> 0.20,,> 0.20,0,0.76,0.00
2,2018-01-02-10-00,24.29,21.59,1.13,140.00,-2.16,-,0.94,-1.27,120%,...,0.62,4.00,1,0.26,> 0.20,,> 0.20,0,0.82,0.00
3,2018-01-02-10-00,24.29,21.59,1.13,160.00,-2.17,-,0.83,-1.34,136%,...,0.38,2.00,1,0.26,> 0.20,,> 0.20,0,0.85,0.00
4,2018-01-02-10-00,24.29,21.59,1.13,180.00,-2.31,-,0.86,-0.92,131%,...,0.38,2.00,1,0.26,> 0.20,,> 0.20,0,0.92,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66793,2018-12-28-19-00,31.92,23.30,1.37,180.00,-2.91,-,1.68,-0.87,81%,...,0.88,5.00,1,0.21,> 0.20,,> 0.20,0,1.14,0.00
66794,2018-12-28-19-00,31.92,23.30,1.37,200.00,-3.25,-,1.72,-0.89,79%,...,0.88,5.00,1,0.21,> 0.20,,> 0.20,0,1.11,0.00
66795,2018-12-28-19-00,31.92,23.30,1.37,220.00,-3.81,95%,1.67,-0.85,82%,...,0.87,5.00,1,0.21,> 0.20,,> 0.20,0,1.11,0.00
66796,2018-12-28-19-00,31.92,23.30,1.37,240.00,-3.18,-,1.54,-0.78,89%,...,0.79,4.00,1,0.21,> 0.20,,> 0.20,0,1.26,0.00


In [198]:
def Busca_trade(cubo_of, coint=2, dp=2., adf_sign =0.95, meia_vida = 10, voltando = False):
    
    date_unique = cubo_of['data'].unique()
    a= 0
    
    trade_select    = pd.DataFrame(columns=['data','preco_y','preco_x','ratio','periodo', 'adf_stat','adf_sign','coef_ang',
                                             'desvio','pct_fin','meia_vida', 'corr', 'fx_corr','per_coint','vol_beta_vertical','fx_vol_beta_vertical',
                                             'vol_beta_vertical1','fx_vol_beta_vertical1','adf99_periodos'])

    full_trade_select = pd.DataFrame(columns=['data','preco_y','preco_x','ratio','periodo', 'adf_stat','adf_sign','coef_ang',
                                             'desvio','pct_fin','meia_vida', 'corr', 'fx_corr','per_coint','vol_beta_vertical','fx_vol_beta_vertical',
                                             'vol_beta_vertical1','fx_vol_beta_vertical1','adf99_periodos'])
    
    #arredondando o desvio
    cubo_of['desvio'] = cubo_of['desvio'].apply(lambda x: round(x,2))
    #criando o desvio com valor absoluto
    cubo_of['desvio_abs'] = cubo_of['desvio'].apply(lambda x: abs(x))
    #removendo elementos nao cointegrados adf_sign = '-'
    cubo_of_index = cubo_of[ cubo_of['adf_sign'] == '-' ].index
    cubo_of.drop(cubo_of_index , inplace=True)
    #removendo as linhas de close diario com hora = '19:00'
    cubo_of_index = cubo_of[ cubo_of['data'].apply(lambda x: x[-5:]) == '19-00' ].index
    cubo_of.drop(cubo_of_index , inplace=True)
    #definindo a data quarenta como a primeira data pra iniciar o loop
    dt_quarentena = datetime.strptime(date_unique[0], '%Y-%m-%d-%H-%M')

    for date in date_unique:
        
        cubo_var = cubo_of.loc[cubo_of['data'] == date].copy()
        #selecionando linhas com padroes de DP e #perdiodos cointegrados
        var_trade = cubo_var.loc[(cubo_var['desvio_abs'] >= dp) & (cubo_var['per_coint'] >= coint)]
        # Ordenando por t-stat do ADF e pegando o maior valor
        var_trade = var_trade.sort_values(by='adf_stat')
        
        
        #append do trade de cada iteracao, caso haja
        if len (var_trade) != 0:
            
            #pegando a data do trade e criando o dt_obj da mesma , ESTE SERA O TRADE SELECIONADO
            data_trade = var_trade.iloc[0]['data']
            d_obj_trade = datetime.strptime(data_trade, '%Y-%m-%d-%H-%M') 
            
            #print ('Tentativa de adicionar trade da data    => {data}'.format(data = d_obj_trade)) # remover dps do debug
            #print ('Trades em quarentena ate a data:        => {data}'.format(data = dt_quarentena))# remover dps do debug
            full_trade_select = full_trade_select.append(var_trade.iloc[0]) # remover dps do debug
            
            #verificando se a data do trade e maior que a data de quarentena de 1/4 de meia vida do trade anterior do par
            if (d_obj_trade >= dt_quarentena):
                #print('***************************************')
                #print('********** Trade Adicionado ***********')
                #print('***************************************')
                trade_select = trade_select.append(var_trade.iloc[0])
                #criando o periodo de quarentana arredondado para cima
                dias_quarentena = int(math.ceil(var_trade.iloc[0]['meia_vida'] / 4))
                #criando a date_obj da quarenta a ser reseitada em data util B3
                dt_quarentena = datetime.strptime(ajusta_B3Date(data_trade[:-6], dias_quarentena), '%Y-%m-%d-%H-%M')
            
    #print ('\n')
    
    print ('TOTAL DE TRADES IDENTIFICADOS NO PERIODO  => {data}'.format(data = len(full_trade_select))) # remover dps do debug
    print ('TOTAL DE TRADES SELECIONADOS NO PERIODO   => {data}'.format(data = len(trade_select))) # remover dps do debug
    
    display(full_trade_select)
    display(trade_select)
    full_trade_select.to_excel('Total_trades.xlsx') # remover dps do debug
    return trade_select



In [199]:
trades = Busca_trade (cubototal)

trades.to_excel('Trades_adicionados.xlsx')

TOTAL DE TRADES IDENTIFICADOS NO PERIODO  => 49
TOTAL DE TRADES SELECIONADOS NO PERIODO   => 4


Unnamed: 0,data,preco_y,preco_x,ratio,periodo,adf_stat,adf_sign,coef_ang,desvio,pct_fin,...,fx_corr,per_coint,vol_beta_vertical,fx_vol_beta_vertical,vol_beta_vertical1,fx_vol_beta_vertical1,adf99_periodos,adfcount,desvio_abs,dp_residuo
5077,2018-01-26-10-30,29.0,24.12,1.2,120.0,-3.58,90%,1.2,2.0,100%,...,5.0,2,0.2,> 0.20,0.05,<= 0.05,0,0.0,2.0,0.5
5086,2018-01-26-10-45,29.04,24.1,1.2,120.0,-3.55,90%,1.2,2.11,100%,...,5.0,2,0.2,> 0.20,0.05,<= 0.05,0,0.0,2.11,0.5
5095,2018-01-26-11-00,29.16,24.21,1.2,120.0,-3.56,90%,1.21,2.08,100%,...,5.0,2,0.2,> 0.20,0.05,<= 0.05,0,0.0,2.08,0.5
5104,2018-01-26-11-15,29.13,24.19,1.2,120.0,-3.56,90%,1.21,2.07,100%,...,5.0,2,0.2,> 0.20,0.05,<= 0.05,0,0.0,2.07,0.5
5122,2018-01-26-11-45,29.07,24.14,1.2,120.0,-3.56,90%,1.21,2.07,100%,...,5.0,2,0.2,> 0.20,0.05,<= 0.05,0,0.0,2.07,0.5
5158,2018-01-26-12-45,29.44,24.36,1.21,120.0,-3.52,90%,1.21,2.23,100%,...,5.0,2,0.2,<= 0.20,0.05,<= 0.05,0,0.0,2.23,0.5
5167,2018-01-26-13-00,29.58,24.48,1.21,120.0,-3.53,90%,1.21,2.22,100%,...,5.0,2,0.2,<= 0.20,0.05,<= 0.05,0,0.0,2.22,0.5
5221,2018-01-26-14-30,29.6,24.48,1.21,120.0,-3.52,90%,1.22,2.25,100%,...,5.0,2,0.2,<= 0.20,0.05,<= 0.05,0,0.0,2.25,0.5
5230,2018-01-26-14-45,29.47,24.4,1.21,120.0,-3.53,90%,1.21,2.2,100%,...,5.0,2,0.2,<= 0.20,0.05,<= 0.05,0,0.0,2.2,0.5
5878,2018-01-30-16-15,28.81,23.64,1.22,120.0,-3.5,90%,1.29,2.12,106%,...,5.0,2,0.18,<= 0.20,0.06,<= 0.10,0,0.0,2.12,0.54


Unnamed: 0,data,preco_y,preco_x,ratio,periodo,adf_stat,adf_sign,coef_ang,desvio,pct_fin,...,fx_corr,per_coint,vol_beta_vertical,fx_vol_beta_vertical,vol_beta_vertical1,fx_vol_beta_vertical1,adf99_periodos,adfcount,desvio_abs,dp_residuo
5077,2018-01-26-10-30,29.0,24.12,1.2,120.0,-3.58,90%,1.2,2.0,100%,...,5.0,2,0.2,> 0.20,0.05,<= 0.05,0,0.0,2.0,0.5
7957,2018-02-08-16-15,28.45,22.98,1.24,120.0,-3.92,95%,1.49,2.0,120%,...,5.0,2,0.18,<= 0.20,0.03,<= 0.05,0,0.0,2.0,0.56
8470,2018-02-14-17-00,29.63,23.57,1.26,120.0,-3.66,90%,1.53,2.2,122%,...,5.0,2,0.18,<= 0.20,0.08,<= 0.10,0,0.0,2.2,0.57
9119,2018-02-19-10-15,29.86,23.31,1.28,140.0,-3.48,90%,1.47,2.61,115%,...,5.0,2,0.18,<= 0.20,0.06,<= 0.10,0,0.0,2.61,0.67


In [None]:
display(trades)

In [None]:
# def verifica(x):
#     if (x == "99%"): return 1;
#     else: return 0;
    
# cubototal['eh_adf99'] = cubototal['adf_sign'].apply(lambda x: verifica(x))
# cubototal['adf99_5dias'] = 
# display(cubototal)
# print (tmp['adfs99'].rolling(10).sum())


#df['MA{}'.format(i)] = df.rolling(window=i).mean()

In [None]:
# Criando um cubo 

# Filtra apenas "19-00" e periodo=250
# tmp = cubototal[ (cubototal['data'].apply(lambda x: x[-5:]) == '19-00') & (cubototal['periodo']==250) ]



display(tmp)
#tmp.loc[cubototal['periodo']==250]


In [197]:
cubototal.to_excel('cubo.xlsx')