In [4]:
from copy import deepcopy
from scipy.spatial import distance
from future.utils import iteritems
from datetime import date, datetime
import pandas as pd
import numpy as np
import bt

class ChecaSeNaoFechouPar(bt.Algo):
    def __init__(self):
        super(ChecaSeNaoFechouPar, self).__init__()
    
    def __call__(self, target):
        return not target.perm["fechou_par"]

class ConfiguracaoInicial(bt.Algo):
    def __init__(self):
        super(ConfiguracaoInicial, self).__init__()
    
    def __call__(self, target):
        target.perm["data_inicial"] = target.now
        target.perm["estado"] = 0
        target.perm["fechou_par"] = False
        target.perm["std_spread"]=target.get_data("std_spread")
        return True

class Normaliza(bt.Algo):
    def __init__(self):
        super(Normaliza, self).__init__()

    def __call__(self, target):
        pair_1 = target.get_data("pair_1")
        pair_2 = target.get_data("pair_2")
        now = target.now
        price_1 = target.universe[pair_1][now]
        price_2 = target.universe[pair_2][now]
        max_1   = target.get_data("max_1")
        min_1   = target.get_data("min_1")
        max_2   = target.get_data("max_2")
        min_2   = target.get_data("min_2")
        
        target.temp["norm_1"] = (price_1 - min_1)/(max_1 - min_1)
        target.temp["norm_2"] = (price_2 - min_2)/(max_2 - min_2)
        
        target.temp["spread"] = target.temp["norm_1"] - target.temp["norm_2"]
        return True 
        
class ChecaSeAbre(bt.Algo):
    def __init__(self, limite):
        super(ChecaSeAbre, self).__init__()
        self.limite=limite

    def __call__(self, target):
        if target.perm["estado"] == 0 and target.temp["spread"]>2*target.perm["std_spread"]:
            target.perm["estado"] = 1
            return True

        if target.perm["estado"] == 0 and target.temp["spread"]<-2*target.perm["std_spread"]:
            target.perm["estado"] = -1
            return True

class ChecaSeFecha(bt.Algo):
    def __init__(self, limite):
        super(ChecaSeFecha, self).__init__()
        self.limite=limite

    def __call__(self, target):
        if target.perm["estado"] == 1 and target.temp["spread"]<0:
            target.perm["estado"] = 0
            return True

        if target.perm["estado"] == -1 and target.temp["spread"]>0:
            target.perm["estado"] = 0
            return True

        return False


class ChecaSeAberto(bt.Algo):
    def __init__(self):
        super(ChecaSeAberto, self).__init__()

    def __call__(self, target):
        if target.perm["estado"] != 0 :
            return True

        return False

class Abre(bt.Algo):
    def __init__(self, weight):
        super(Abre, self).__init__()
        self.weight = weight

    def __call__(self, target):
        if target.perm["estado"] == 1:
            target.temp["weights"] = {  target.get_data("pair_1") : -self.weight,
                                        target.get_data("pair_2") : self.weight}
        
        if target.perm["estado"] == -1:
            target.temp["weights"] = {  target.get_data("pair_1") : self.weight,
                                        target.get_data("pair_2") : -self.weight}
        
        return True


class Fecha(bt.Algo):
    def __init__(self):
        super(Fecha, self).__init__()

    def __call__(self, target):
        target.temp["weights"] = {  target.get_data("pair_1") : 0.,
                                    target.get_data("pair_2") : 0.}
        
        return True


class ChecaSeAcabouPeriodoDeTrocas(bt.Algo):
    def __init__(self, meses):
        super(ChecaSeAcabouPeriodoDeTrocas, self).__init__()
        self.meses = meses

    def __call__(self, target):
        if target.perm["data_inicial"] + pd.DateOffset(months=self.meses) <= target.now:
            return True

        return False

class ChecaSeFechado(bt.Algo):
    def __init__(self):
        super(ChecaSeFechado, self).__init__()

    def __call__(self, target):
        if target.perm["estado"] == 0:
            return True
        
        return False

class EncerraPar(bt.Algo):
    def __init__(self):
        super(EncerraPar, self).__init__()

    def __call__(self, target):
        if target.children and not target.bankrupt:
            target.flatten()
            target.update( target.now ) 

            if target.parent != target:
                capital = target.capital
                target.adjust(-capital, update=False, flow=True)
                target.parent.adjust(capital, update=True, flow=False)
                target.perm["fechou_par"] = True
        return False 

class ConfiguracaoInicialEstrategia(bt.Algo):
    def __init__(self):
        super(ConfiguracaoInicialEstrategia, self).__init__()

    def __call__(self, target):
        target.perm["ultima_data_inicio"] = target.now+pd.DateOffset(months = 6)
        target.perm["ativos"] = target.universe.columns 
        return False 

class ChecaSeEscolhePares(bt.Algo):
    def __init__(self):
        super(ChecaSeEscolhePares, self).__init__()

    def __call__(self, target):
        if target.now>target.perm["ultima_data_inicio"]+pd.DateOffset(months=6):
            target.perm["ultima_data_inicio"] = target.now
            
            return True
        return False

class SelecionaPares(bt.Algo):
    def __init__(self, n_pares):
        super(SelecionaPares, self).__init__()
        self.n_pares = n_pares

    def __call__(self, target):
        ativos = target.perm["ativos"]
        matriz_precos_pfp = target.universe[target.now-pd.DateOffset(months=12):target.now][ativos]
        matriz_norm = self._normaliza_precos(matriz_precos_pfp)  
        matriz_distancia = self._encontra_distancia(matriz_norm)
        indices = self._encontra_menores_distancias(matriz_distancia)
        self._adiciona_pares(target, matriz_norm, matriz_precos_pfp, indices)
        return True 

    def _normaliza_precos(self, matriz):
        maximos = matriz.max(axis = 0)
        minimos = matriz.min(axis = 0)
        matriz_norm = (matriz-minimos)/(maximos-minimos)
        return matriz_norm
    
    def _encontra_distancia(self, matriz_norm):
        matriz_distancia = distance.cdist(matriz_norm.T, matriz_norm.T, 'euclidean') 
        sem_repeticao = np.triu(matriz_distancia)
        sem_repeticao[sem_repeticao == 0.] = np.nan
        return sem_repeticao
    
    def _encontra_menores_distancias(self, matriz):
        indices_tupla = np.unravel_index(np.argsort(matriz, axis=None), matriz.shape)
        indices_np = np.array(indices_tupla)
        return indices_np[:, :self.n_pares]

    def _adiciona_pares(self, target, matriz_norm, matriz_precos_pfp, indices):
        lista_pares = []
        for indice_par in indices.T:
            idx_par_1 = indice_par[0]
            idx_par_2 = indice_par[1]

            name_par_1 = target.universe.columns[idx_par_1]
            name_par_2 = target.universe.columns[idx_par_2]
            
            preco_par_1 = matriz_precos_pfp[name_par_1]
            preco_par_2 = matriz_precos_pfp[name_par_2]
            
            norm_par_1 = matriz_norm[name_par_1]
            norm_par_2 = matriz_norm[name_par_2]


            maximo_par_1 = preco_par_1.max(axis = 0)
            minimo_par_1 = preco_par_1.min(axis = 0)
            maximo_par_2 = preco_par_2.max(axis = 0)
            minimo_par_2 = preco_par_2.min(axis = 0)
            spread = norm_par_1 - norm_par_2
            std_spread = spread.std()
            par = (name_par_1, name_par_2,maximo_par_1, minimo_par_1, maximo_par_2, minimo_par_2, std_spread)
            lista_pares.append(par)
        
        target.temp["pares"] = lista_pares
        return 

class CriaPares(bt.Algo):
    def __init__(self, periodo_de_trocas_par_algos):
        super(CriaPares, self).__init__()
        self.pt_algos = periodo_de_trocas_par_algos

    def __call__(self, target):
        lista_pares = target.temp["pares"]
        target.temp["weights"] = {}
        for n1, n2, max1, min1, max2, min2, std_spread in lista_pares:
            pair_name = "%s_%s_%s" % (n1, n2, target.now.strftime("%m/%Y"))
            trade = bt.Strategy(pair_name, deepcopy(self.pt_algos), children = [n1, n2], parent = target)
            trade.setup_from_parent(pair_1 = n1, pair_2 = n2, max_1 = max1, min_1 = min1, max_2 = max2, min_2 = min2, std_spread = std_spread)
            target.temp["weights"][pair_name] = 0
        return True 

class AlocaPesosPares(bt.Algo):
    def __init__(self, pct_capital):
        super(AlocaPesosPares, self).__init__()
        self.pct_capital = pct_capital

    def __call__(self, target):
        weights = target.temp.get("weights")
        pair_capital = target.capital * self.pct_capital
        for pair_name in weights:
            target.allocate(pair_capital, child = pair_name, update = False) 
        
        target.update(target.now)     
        return True 

def make_data( n_assets=100, n_periods=2000, start_date=date(2021,1,1), phi=0.5, corr=1.0, seed=1234 ):
    ''' Randomly generate a data set consisting of non-stationary prices,
        but where the difference between the prices of any two securities is. '''
    np.random.seed(seed)
    dts = pd.date_range( start_date, periods=n_periods)
    T = dts.values.astype('datetime64[D]').astype(float).reshape(-1,1)
    N = n_assets
    columns = ['s%i' %i for i in range(N)]
    cov = corr * np.ones( (N,N) ) + (1-corr) * np.eye(N)
    noise = pd.DataFrame( np.random.multivariate_normal( np.zeros(N), cov, len(dts)), index = dts, columns = columns )
    # Generate an AR(1) process with parameter phi
    eps = pd.DataFrame( np.random.multivariate_normal( np.zeros(N), np.eye(N), len(dts)), index = dts, columns=columns)
    alpha = 1 - phi
    eps.values[1:] = eps.values[1:] / alpha # To cancel out the weighting that ewm puts on the noise term after x0
    ar1 = eps.ewm(alpha=alpha, adjust=False).mean()
    ar1 *= np.sqrt(1.-phi**2) # Re-scale to unit variance, since the standard AR(1) process has variance sigma_eps/(1-phi^2)
    data = 100. + noise.cumsum()*np.sqrt(0.5) + ar1*np.sqrt(0.5)
    # With the current setup, the difference between any two series should follow a mean reverting process with std=1
    return data

def run():
    data = make_data()

    ConfiguraPar        = bt.AlgoStack(bt.algos.RunOnce(), ConfiguracaoInicial())
    NormalizaPar        = bt.AlgoStack(ChecaSeNaoFechouPar(), Normaliza())
    AbrePosicao         = bt.AlgoStack(ChecaSeAbre(5.), Abre(1.), bt.algos.Rebalance())
    FechaPosicao        = bt.AlgoStack(ChecaSeFecha(5.), Fecha(), bt.algos.Rebalance())
    FechaPosicaoFim     = bt.AlgoStack(ChecaSeAberto(), Fecha(), bt.algos.Rebalance())
    FimPeriodoDeTrocas  = bt.AlgoStack(ChecaSeAcabouPeriodoDeTrocas(6), bt.algos.Or([ChecaSeFechado(), FechaPosicaoFim]), EncerraPar())
    
    PeriodoDeTrocasPar  = [
        bt.algos.Or([ConfiguraPar, NormalizaPar]),
        bt.algos.Or([AbrePosicao, FechaPosicao, FimPeriodoDeTrocas])
    ]
    
    ChecaSelecionaPares = bt.algos.Or([bt.AlgoStack(bt.algos.RunOnce(), ConfiguracaoInicialEstrategia()), ChecaSeEscolhePares()])
    PeriodoSelecaoPares = [
        ChecaSelecionaPares, 
        SelecionaPares(10),
        CriaPares(PeriodoDeTrocasPar),
        AlocaPesosPares(0.1)
    ]
    
    strategy = bt.Strategy("PairsTradingDistanceApproach", PeriodoSelecaoPares)
    test = bt.Backtest(strategy, data)
    out = bt.run(test)
    print(out.stats)
    return out

if __name__ == "__main__":
    run()

                      PairsTradingDistanceApproach
start                          2020-12-31 00:00:00
end                            2026-06-23 00:00:00
rf                                             0.0
total_return                              7.063621
cagr                                      0.464047
max_drawdown                             -0.005615
calmar                                   82.647229
mtd                                       0.032516
three_month                               0.128171
six_month                                 0.257028
ytd                                       0.244339
one_year                                  0.634191
three_year                                0.600591
five_year                                 0.518207
ten_year                                       NaN
incep                                     0.464047
daily_sharpe                             10.119363
daily_sortino                            42.876023
daily_mean                     

  res = np.divide(er.mean(), std)
  res = np.divide(er.mean(), std)


Unnamed: 0,p1,p2
2021-01-01,1.0,0.9
2021-01-02,0.9,0.75
2021-01-03,0.8,0.5
2021-01-04,0.7,0.6
2021-01-05,0.6,0.6


In [2]:
run().prices

                           PeriodoDeTrocas
start                  2020-12-31 00:00:00
end                    2021-01-05 00:00:00
rf                                     0.0
total_return                          0.45
cagr                   613666708322.283813
max_drawdown                           0.0
calmar                                 inf
mtd                                   0.45
three_month                            NaN
six_month                              NaN
ytd                                   0.45
one_year                               NaN
three_year                             NaN
five_year                              NaN
ten_year                               NaN
incep                  613666708322.283813
daily_sharpe                      9.452284
daily_sortino                          inf
daily_mean                       21.134717
daily_vol                         2.235938
daily_skew                        1.826905
daily_kurt                        3.208657
best_day   

  res = np.divide(er.mean(), std)
  self.calmar = np.divide(self.cagr, np.abs(self.max_drawdown))
  res = np.divide(er.mean(), std)
  self.calmar = np.divide(self.cagr, np.abs(self.max_drawdown))


Unnamed: 0,PeriodoDeTrocas
2020-12-31,100.0
2021-01-01,100.0
2021-01-02,100.0
2021-01-03,100.0
2021-01-04,132.5
2021-01-05,145.0
