In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
from hurst import compute_Hc
import statsmodels.tsa.stattools as ts
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import scipy.optimize as spop
from sklearn.linear_model import LinearRegression
from statistics import mean
import json

In [None]:

with open('stat_pairs.json', 'r') as f:
    selected_pairs_3 = json.load(f)

In [None]:
plt.style.use('seaborn')
sns.set_style('darkgrid')
prices = pd.read_csv('etfs.csv')
#prices.columns
prices['Date'] = pd.to_datetime(prices['Date'])
prices = prices.set_index('Date')
prices = prices.dropna(axis=1)
prices.head()
returns = np.log(prices).diff().dropna()

In [None]:
P_VALUE_THRESHOLD = 0.05
HURST_THRESHOLD = 0.5
TRADING_PERIOD = 253

In [None]:
form_start = '2011-01-01'
form_end = '2016-12-31'
trade_start = '2017-01-01'
trade_end = '2019-12-31'

prices_form = prices[form_start:form_end]
prices_trade = prices[trade_start:trade_end]
returns_form = returns.loc[form_start:form_end]
returns_trade = returns.loc[trade_start:trade_end]

In [None]:
start = trade_start
end = trade_end
fee = 0
window = 252
t_threshold = -2.5

In [None]:
#helper funcs
def parse_pair(pair):
    s1 = pair[:pair.find('-')]
    s2 = pair[pair.find('-')+1:]
    return s1,s2

def calculate_metrics(cumret):
    
        total_return = (cumret[-1] - cumret[0])/cumret[0]
        apr = (1+total_return)**(252/len(cumret)) - 1
        rets = pd.DataFrame(cumret).pct_change()
        sharpe = np.sqrt(252) * np.nanmean(rets) / np.nanstd(rets)
    
        highwatermark=np.zeros(cumret.shape)
        drawdown=np.zeros(cumret.shape)
        drawdownduration=np.zeros(cumret.shape)
        for t in np.arange(1, cumret.shape[0]):
            highwatermark[t]=np.maximum(highwatermark[t-1], cumret[t])
            drawdown[t]=cumret[t]/highwatermark[t]-1
            if drawdown[t]==0:
                drawdownduration[t]=0
            else:
                drawdownduration[t]=drawdownduration[t-1]+1
        maxDD=np.min(drawdown)
        maxDDD=np.max(drawdownduration)
    
        return total_return, sharpe, maxDD

In [None]:
def baseline(selected_pairs_3, returns_form, trade_start,trade_end):
    data = returns_form
    print(selected_pairs_3)
    
    
    
    
    start = trade_start
    end = trade_end
    fee = 0
    window = 252
    t_threshold = -2.5
    
    #initialising arrays

    gross_returns = np.array([])
    net_returns = np.array([])
    t_s = np.array([])
    stock1, stock2 = parse_pair(selected_pairs_3)
    data = data[start:end]
    #print(data)
#moving through the sample
    for t in range(window, len(data)):
    #defining the unit root function: stock2 = a + b*stock1
        def unit_root(b):
            a = np.average(data[stock2][t-window:t] - b*data[stock1][t-window:t])
            fair_value = a + b*data[stock1][t-window:t]
            diff = np.array(fair_value - data[stock2][t-window:t])
            diff_diff = diff[1:] - diff[:-1]
            reg = sm.OLS(diff_diff, diff[:-1])
            res = reg.fit()
            return res.params[0]/res.bse[0]
    #optimising the cointegration equation parameters
        res1 = spop.minimize(unit_root, data[stock2][t]/data[stock1][t], method='Nelder-Mead')
        t_opt = res1.fun
        b_opt = float(res1.x)
        a_opt = np.average(data[stock2][t-window:t] - b_opt*data[stock1][t-window:t])
    #simulating trading
        fair_value = a_opt + b_opt*data[stock1][t]
        if t == window:
            old_signal = 0
        if t_opt > t_threshold:
            signal = 0
            gross_return = 0
        else:
            signal = np.sign(fair_value - data[stock2][t])
            gross_return = signal*returns[stock2][t] - signal*returns[stock1][t]
        fees = fee*abs(signal - old_signal)
        net_return = gross_return - fees
        gross_returns = np.append(gross_returns, gross_return)
        net_returns = np.append(net_returns, net_return)
        t_s = np.append(t_s, t_opt)
    return calculate_metrics(np.nancumprod(net_returns + 1))

In [None]:
rets = []
sharpes = []
maxdd = []



for pair in selected_pairs_3:
    x = baseline(pair, prices_trade, trade_start,trade_end)
    rets.append(x[0])
    sharpes.append(x[1])
    maxdd.append(x[2])
    


In [None]:
print(mean(rets))
print(mean(sharpes))
print(mean(maxdd))
