In [None]:
from Copula import *
from scipy.optimize import brentq
from scipy.optimize import minimize_scalar
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from hurst import compute_Hc
import statsmodels.tsa.stattools as ts
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import scipy.optimize as spop
from sklearn.linear_model import LinearRegression
import json
from statistics import mean

In [None]:
with open('stat_pairs.json', 'r') as f:
    selected_pairs_3 = json.load(f)

In [None]:
plt.style.use('seaborn')
sns.set_style('darkgrid')
prices = pd.read_csv('etfs.csv')
#prices.columns
prices['Date'] = pd.to_datetime(prices['Date'])
prices = prices.set_index('Date')
prices = prices.dropna(axis=1)
prices.head()
returns = np.log(prices).diff().dropna()

In [None]:
P_VALUE_THRESHOLD = 0.05
HURST_THRESHOLD = 0.5
TRADING_PERIOD = 253

In [None]:
form_start = '2011-01-01'
form_end = '2016-12-31'
trade_start = '2017-01-01'
trade_end = '2019-12-31'

prices_form = prices[form_start:form_end]
prices_trade = prices[trade_start:trade_end]
returns_form = returns.loc[form_start:form_end]
returns_trade = returns.loc[trade_start:trade_end]

In [None]:
#helper funcs
def parse_pair(pair):
    s1 = pair[:pair.find('-')]
    s2 = pair[pair.find('-')+1:]
    return s1,s2

def calculate_metrics(cumret):
    
        total_return = (cumret[-1] - cumret[0])/cumret[0]
        apr = (1+total_return)**(252/len(cumret)) - 1
        rets = pd.DataFrame(cumret).pct_change()
        sharpe = np.sqrt(252) * np.nanmean(rets) / np.nanstd(rets)
    
    # maxdd and maxddd
        highwatermark=np.zeros(cumret.shape)
        drawdown=np.zeros(cumret.shape)
        drawdownduration=np.zeros(cumret.shape)
        for t in np.arange(1, cumret.shape[0]):
            highwatermark[t]=np.maximum(highwatermark[t-1], cumret[t])
            drawdown[t]=cumret[t]/highwatermark[t]-1
            if drawdown[t]==0:
                drawdownduration[t]=0
            else:
                drawdownduration[t]=drawdownduration[t-1]+1
        maxDD=np.min(drawdown)
        maxDDD=np.max(drawdownduration)
    
        return total_return, sharpe, maxDD

In [None]:


def copula_strat(selected_pairs_3,returns_form):
    selected_pairs = [selected_pairs_3]
    s1, s2 = parse_pair(selected_pairs_3)
    selected_stocks = [s1, s2]
    
    algo_returns = {}
    cl = 0.99 # confidence level
    count = 0



    for pair in selected_pairs:
        s1,s2 = parse_pair(pair)

    # fit marginals
        params_s1 = stats.t.fit(returns_form[s1])
        dist_s1 = stats.t(*params_s1)
        params_s2 = stats.t.fit(returns_form[s2])
        dist_s2 = stats.t(*params_s2)

    # transform marginals
        u = dist_s1.cdf(returns_form[s1])
        v = dist_s2.cdf(returns_form[s2])
        
    # fit copula
        best_aic = np.inf
        best_copula = None

        copulas = [GaussianCopula(), ClaytonCopula(), GumbelCopula(), FrankCopula(), JoeCopula()]
        for copula in copulas:
            copula.fit(u,v)
            L = copula.log_likelihood(u,v)
            aic = 2 * copula.num_params - 2 * L
            if aic < best_aic:
                best_aic = aic
                best_copula = copula
            
    # calculate conditional probabilities
        prob_s1 = []
        prob_s2 = []

        for u,v in zip(dist_s1.cdf(returns_trade[s1]), dist_s2.cdf(returns_trade[s2])):
            prob_s1.append(best_copula.cdf_u_given_v(u,v))
            prob_s2.append(best_copula.cdf_v_given_u(u,v))
        
        probs_trade = pd.DataFrame(np.vstack([prob_s1, prob_s2]).T, index=returns_trade.index, columns=[s1, s2])
    
    # calculate positions
        positions = pd.DataFrame(index=probs_trade.index, columns=probs_trade.columns)
        long = False
        short = False

        for t in positions.index:    
        # if long position is open
            if long:
                if (probs_trade.loc[t][s1] > 0.3) or (probs_trade.loc[t][s2] < 0.7):
                    positions.loc[t] = [0,0]
                    long = False
                else:
                    positions.loc[t] = [1,-1]

        # if short position is open
            elif short:
                if (probs_trade.loc[t][s1] < 0.7) or (probs_trade.loc[t][s2] > 0.3):
                    positions.loc[t] = [0,0]
                    short = False
                else:
                    positions.loc[t] = [-1,1]

        # if no positions are open
            else:
                if (probs_trade.loc[t][s1] < (1-cl)) and (probs_trade.loc[t][s2] > cl):
                # open long position
                    positions.loc[t] = [1,-1]
                    long = True
                elif (probs_trade.loc[t][s1] > cl) and (probs_trade.loc[t][s2] < (1-cl)):
                # open short positions
                    positions.loc[t] = [-1,1]
                    short = True
                else:
                    positions.loc[t] = [0,0]
        #count+=1
                
    # calculate returns
        algo_ret = (returns_trade * positions.shift()).sum(axis=1)
        algo_returns[pair] = algo_ret
    returns = pd.DataFrame.from_dict(algo_returns)
    returns = np.exp(returns) - 1 # convert log-returns to simple returns
    total_ret = returns.sum(axis=1) / len(returns.columns) * 2 # double capital (from short positions)
    
    
    metrics = pd.DataFrame(index=['Algo'], columns=['Total return', 'Sharpe', 'MaxDD'])
    a, b, c = calculate_metrics(np.nancumprod(total_ret + 1))
    return [a,b,c]

In [None]:
rets = []
sharpes = []
maxdd = []



for pair in selected_pairs_3:
    x = copula_strat(pair, returns_trade)
    rets.append(x[0])
    sharpes.append(x[1])
    maxdd.append(x[2])
    #maxddd.append(x[3])
    
    #rets.append(a), sharpes.append(b), maxdd.append(c), maxddd.append(d)



In [None]:
print(mean(rets))
print(mean(sharpes))
print(mean(maxdd))