In [5]:
import numpy as np
import pandas as pd
import itertools
from datetime import datetime
import statsmodels.tsa.stattools as smts
import statsmodels.api as sm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [6]:
path = "/Users/natalieng/Documents/AlgoTrading/Crypto_AlgoTrading"
yr2021_df = pd.read_feather(path+'/data/202122okx_close.feather')
yr2021_df.rename(columns={'index':'open_time'},inplace=True)
yr22_df = pd.read_feather(path+'/data/okx_20221h.feather')

In [7]:
def process_df(df):
    pivot_df = pd.pivot_table(df,values='close',index='open_time',columns='symbol')
    pivot_df = pivot_df.loc[:,~pivot_df.columns.str.endswith(('-BTC.OK','-ETH.OK','-USDC.OK','-USDK.OK','-UST.OK','-DAI.OK','-OKB.OK'))]
    pivot_df.ffill(inplace=True)
    return pivot_df

In [8]:
pivot_2021 = process_df(yr2021_df)
pivot_22 = process_df(yr22_df)

In [9]:
yr20_df = pivot_2021['20200101':'20201231'].dropna(axis=1,how='any')
yr21_df = pivot_2021['20210101':'20211231'].dropna(axis=1,how='any')
yr21_1h_df = pivot_2021['20210101':'20210531'].dropna(axis=1,how='any')
yr21_2h_df = pivot_2021['20210601':'20211231'].dropna(axis=1,how='any')
yr22_df = pivot_22.dropna(axis=1,how='any')

In [23]:
def _distance_score(p1, p2):
    """ for measuring distance btn pairs """
    diff = p1 - p2
    # sum of squared spread diff
    return (diff**2).sum()

def _distance_transform(pair):
    """ normalize price of two coins """
    c0, c1 = pair
    p0 = (c0 - np.mean(c0)) / np.std(c0)
    p1 = (c1 - np.mean(c1)) / np.std(c1)
    trans_training = (p0, p1)
    return trans_training

def _cointegration_test(df, c0, c1):
    """ for getting cointegration test p value """
    Y, X = df[c0], df[c1]
    X = sm.add_constant(X)
    model = sm.OLS(Y, X) #ordinary least square
    results = model.fit()
    _, slope = results.params
    p_value = smts.adfuller(results.resid)[1]
    return p_value, slope
        
def pair_select(df, n = 5):
    """ filter top n pairs with lowest distance score """
    coin_list = df.columns
    res_list = []
    all_pairs = list(itertools.combinations(coin_list,2))
    print(f"Processing {len(all_pairs)} Combinations")
    for pair in all_pairs:
        px_series = (df[pair[0]],df[pair[1]])
        score = _distance_score(*_distance_transform(px_series))
        if score > 0.0:
            res_list.append({'coin': (pair[0],pair[1]),'score': score})
    res_df = pd.DataFrame(res_list)
    res_df.sort_values('score',inplace=True)
    return res_df.head(n)

def find_score(df, c0, c1):
    """ find score for specified coin pair """
    px_series = (df[c0],df[c1])
    score = _distance_score(*_distance_transform(px_series))
    p_value = _cointegration_test(df, c0, c1)[0]
    return (score,p_value)

def c1_select(df, c0, n = 5):
    """ filter top n pairs with lowest distance score against specified 1 coin """
    coin_list = df.columns
    res_list = []
    all_pairs = [(c0, coin) for coin in coin_list]
    print(len(all_pairs))
    for pair in all_pairs:
        try:
            px_series = (df[pair[0]],df[pair[1]])
            score = _distance_score(*_distance_transform(px_series))
            p_value = _cointegration_test(df, pair[0], pair[1])[0]
            if score > 0.0:
                res_list.append({'coin': (pair[0],pair[1]),
                                 'score': score,
                                 'pvalue': p_value})
        except:
            pass
    res_df = pd.DataFrame(res_list)
    res_df.sort_values('pvalue',inplace=True)
    return res_df.head(n)

def coint(df, dist_pairs, sig_level = 0.01):
    df.ffill(inplace=True)
    df.bfill(inplace=True)
    cointegrated_pairs = []
    coin_list = df.columns
    all_pairs = list(itertools.combinations(coin_list, 2))
    for pair in dist_pairs:
        p_value, slope = _cointegration_test(df, pair[0], pair[1])
        if slope > 0 and p_value < sig_level:
            cointegrated_pairs.append(tuple([pair[0], pair[1], p_value]))
    return cointegrated_pairs

# result summary
def summarize_btc(df, n=20):
    print("### BITCOIN COINTEGRATION ###")
    return c1_select(df, 'BTC-USDT.OK',n)

def summarize_coins(df, n=20):
    print("### ALL COINS COINTEGRATION ###")
    dist_pair = pair_select(df, n)
    return coint(df, dist_pair['coin'].values)

In [24]:
#%%time 
summarize_btc(yr20_df, n = 10)

### BITCOIN COINTEGRATION ###
125


Unnamed: 0,coin,score,pvalue
106,"(BTC-USDT.OK, WAVES-USDT.OK)",64.634831,0.03277
95,"(BTC-USDT.OK, THETA-USDT.OK)",99.911651,0.160164
109,"(BTC-USDT.OK, XEM-USDT.OK)",83.05466,0.312451
121,"(BTC-USDT.OK, ZIL-USDT.OK)",99.112341,0.316745
23,"(BTC-USDT.OK, CVC-USDT.OK)",82.089568,0.331171
44,"(BTC-USDT.OK, GTO-USDT.OK)",249.797581,0.368906
27,"(BTC-USDT.OK, DCR-USDT.OK)",153.741517,0.614792
63,"(BTC-USDT.OK, LTC-USDT.OK)",117.070069,0.710666
110,"(BTC-USDT.OK, XLM-USDT.OK)",118.491151,0.756934
113,"(BTC-USDT.OK, XRP-USDT.OK)",185.828208,0.761241


In [14]:
#%%time 
summarize_btc(yr21_df, n = 10)

### BITCOIN COINTEGRATION ###
230


Unnamed: 0,coin,score,pvalue
59,"(BTC-USDT.OK, DOT-USDT.OK)",59.723803,0.001806
169,"(BTC-USDT.OK, TMTG-USDT.OK)",458.119221,0.041356
199,"(BTC-USDT.OK, XTZ-USDT.OK)",173.563409,0.049345
80,"(BTC-USDT.OK, GUSD-USDT.OK)",934.570621,0.052249
196,"(BTC-USDT.OK, XPR-USDT.OK)",286.784969,0.071009
70,"(BTC-USDT.OK, FAIR-USDT.OK)",376.68217,0.086291
128,"(BTC-USDT.OK, NU-USDT.OK)",331.992572,0.091292
124,"(BTC-USDT.OK, NDN-USDT.OK)",348.651287,0.093756
13,"(BTC-USDT.OK, APIX-USDT.OK)",538.57644,0.09901
99,"(BTC-USDT.OK, KP3R-USDT.OK)",373.307414,0.108178


In [15]:
#%%time 
summarize_btc(yr22_df, n = 30)

### BITCOIN COINTEGRATION ###
318


Unnamed: 0,coin,score,pvalue
284,"(BTC-USDT.OK, WBTC-USDT.OK)",0.001273,2.223719e-12
105,"(BTC-USDT.OK, EXE-USDT.OK)",22.700412,3.533182e-06
1,"(BTC-USDT.OK, AAC-USDT.OK)",23.16826,0.0007079501
259,"(BTC-USDT.OK, TMTG-USDT.OK)",21.255639,0.0009796325
217,"(BTC-USDT.OK, PROPS-USDT.OK)",25.69596,0.001092854
144,"(BTC-USDT.OK, KAN-USDT.OK)",15.824845,0.002308169
40,"(BTC-USDT.OK, BORING-USDT.OK)",32.536344,0.002697147
249,"(BTC-USDT.OK, STRK-USDT.OK)",37.437863,0.003963406
174,"(BTC-USDT.OK, MCO-USDT.OK)",29.525618,0.004377112
4,"(BTC-USDT.OK, ACT-USDT.OK)",38.974821,0.005279712


In [20]:
#%%time 
summarize_coins(yr22_df, n = 20)

### ALL COINS COINTEGRATION ###
Processing 50403 Combinations


[('BTC-USDT.OK', 'WBTC-USDT.OK', 2.2237187020606347e-12),
 ('MASK-USDT.OK', 'SUSHI-USDT.OK', 0.0012594181249542342),
 ('POLS-USDT.OK', 'WTC-USDT.OK', 4.6069744545838945e-06),
 ('HC-USDT.OK', 'NAS-USDT.OK', 1.2025072068446594e-05),
 ('KAR-USDT.OK', 'SUSHI-USDT.OK', 0.0033454818597293376),
 ('AKITA-USDT.OK', 'YFI-USDT.OK', 3.9574038116397857e-07),
 ('KOL-USDT.OK', 'WSB-USDT.OK', 0.0011347118980971351),
 ('ICP-USDT.OK', 'LINK-USDT.OK', 3.88556610236644e-09),
 ('ALGO-USDT.OK', 'RAY-USDT.OK', 9.243989548522248e-06),
 ('IOTA-USDT.OK', 'YGG-USDT.OK', 0.0001226699193146375),
 ('ALGO-USDT.OK', 'MASK-USDT.OK', 2.289311585553602e-05),
 ('ALGO-USDT.OK', 'EDEN-USDT.OK', 1.6247172725847987e-05),
 ('NAS-USDT.OK', 'WIN-USDT.OK', 6.355287367942369e-07),
 ('IOTA-USDT.OK', 'WTC-USDT.OK', 0.0016142558674435525),
 ('RAY-USDT.OK', 'SUSHI-USDT.OK', 0.00018126328195948468)]

In [25]:
print(find_score(yr20_df, 'BTC-USDT.OK', 'ETH-USDT.OK'))
print(find_score(yr21_1h_df, 'BTC-USDT.OK', 'ETH-USDT.OK'))
print(find_score(yr21_2h_df, 'BTC-USDT.OK', 'ETH-USDT.OK'))
print(find_score(yr22_df, 'BTC-USDT.OK', 'ETH-USDT.OK'))

(44.09989121265603, 0.9177349194127523)
(168.83754356461873, 0.759080545982585)
(30.03759059724164, 0.30527632606561916)
(7.873979656452303, 0.10298801461136775)


In [26]:
# print(find_score(yr20_df, 'BTC-USDT.OK', 'DOT-USDT.OK'))
print(find_score(yr21_df, 'BTC-USDT.OK', 'DOT-USDT.OK'))
print(find_score(yr22_df, 'BTC-USDT.OK', 'DOT-USDT.OK'))

(59.723803131047035, 0.0018055019628902009)
(32.17509287819332, 0.3575433598343829)


In [29]:
print(find_score(yr22_df, 'DOGE-USDT.OK', 'SHIB-USDT.OK'))

(17.527291014873526, 0.09420840425207494)
