In [3]:
import numpy as np
import pandas as pd
import itertools
from datetime import datetime
import statsmodels.tsa.stattools as smts
import statsmodels.api as sm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [183]:
path = "/Users/natalieng/Documents/AlgoTrading/Crypto_AlgoTrading"
yr2021_df = pd.read_feather(path+'/data/202122okx_close.feather')
yr2021_df.rename(columns={'index':'open_time'},inplace=True)
yr22_df = pd.read_feather(path+'/data/okx_20221h.feather')

In [185]:
def process_df(df):
    pivot_df = pd.pivot_table(df,values='close',index='open_time',columns='symbol')
    pivot_df = pivot_df.loc[:,~pivot_df.columns.str.endswith(('-BTC.OK','-ETH.OK','-USDC.OK','-USDK.OK','-UST.OK','-DAI.OK','-OKB.OK'))]
    pivot_df.ffill(inplace=True)
    return pivot_df

In [186]:
pivot_2021 = process_df(yr2021_df)
pivot_22 = process_df(yr22_df)

In [187]:
yr20_df = pivot_2021['20200101':'20201231'].dropna(axis=1,how='any')
yr21_df = pivot_2021['20210101':'20211231'].dropna(axis=1,how='any')
yr22_df = pivot_22.dropna(axis=1,how='any')

## Distance Method

In [196]:
def _distance_score(p1, p2):
    diff = p1 - p2
    # sum of squared spread diff
    return (diff**2).sum()

def _distance_transform(pair):
    c0, c1 = pair
    p0 = (c0 - np.mean(c0)) / np.std(c0)
    p1 = (c1 - np.mean(c1)) / np.std(c1)
    trans_training = (p0, p1)
    return trans_training
        
def pair_select(df, n = 5):
    # filter top n pairs with lowest distance score
    coin_list = df.columns
    res_list = []
    all_pairs = list(itertools.combinations(coin_list,2))
    print(len(all_pairs))
    for pair in all_pairs:
        px_series = (df[pair[0]],df[pair[1]])
        score = _distance_score(*_distance_transform(px_series))
        if score > 0.0:
            res_list.append({'coin': (pair[0],pair[1]),'score': score})
    res_df = pd.DataFrame(res_list)
    res_df.sort_values('score',inplace=True)
    return res_df.head(n)

def find_score(df, c0, c1):
    # find score for specified coin pair
    px_series = (df[c0],df[c1])
    score = _distance_score(*_distance_transform(px_series))
    return score

def c1_select(df, c0, n = 5):
    # filter top n pairs with lowest distance score against specified 1 coin
    coin_list = df.columns
    res_list = []
    all_pairs = [('BTC-USDT.OK',coin) for coin in coin_list]
    print(len(all_pairs))
    for pair in all_pairs:
        px_series = (df[pair[0]],df[pair[1]])
        score = _distance_score(*_distance_transform(px_series))
        if score > 0.0:
            res_list.append({'coin': (pair[0],pair[1]),'score': score})
    res_df = pd.DataFrame(res_list)
    res_df.sort_values('score',inplace=True)
    return res_df.head(n)

In [169]:
%%time 
dist_pairs = pair_select(yr20_df, n = 20)
dist_pairs

7260
CPU times: user 4 s, sys: 117 ms, total: 4.11 s
Wall time: 4.02 s


Unnamed: 0,coin,score
1321,"(BEC-USDT.OK, MOF-USDT.OK)",33.279319
5347,"(LINK-USDT.OK, UTK-USDT.OK)",33.953485
6253,"(ONT-USDT.OK, WTC-USDT.OK)",36.562414
1354,"(BEC-USDT.OK, VNT-USDT.OK)",36.946455
1956,"(BTM-USDT.OK, NAS-USDT.OK)",38.709841
5989,"(NAS-USDT.OK, ONT-USDT.OK)",39.722675
3958,"(FAIR-USDT.OK, LRC-USDT.OK)",39.991673
5310,"(LINK-USDT.OK, MANA-USDT.OK)",40.322691
5408,"(LRC-USDT.OK, UTK-USDT.OK)",40.390701
2277,"(CMT-USDT.OK, SOC-USDT.OK)",40.990326


In [200]:
c1_select(yr20_df, 'BTC-USDT.OK', 10)

125


Unnamed: 0,coin,score
37,"(BTC-USDT.OK, ETH-USDT.OK)",44.099891
111,"(BTC-USDT.OK, XMR-USDT.OK)",61.115865
106,"(BTC-USDT.OK, WAVES-USDT.OK)",64.634831
23,"(BTC-USDT.OK, CVC-USDT.OK)",82.089568
109,"(BTC-USDT.OK, XEM-USDT.OK)",83.05466
121,"(BTC-USDT.OK, ZIL-USDT.OK)",99.112341
95,"(BTC-USDT.OK, THETA-USDT.OK)",99.911651
67,"(BTC-USDT.OK, MDT-USDT.OK)",103.257945
63,"(BTC-USDT.OK, LTC-USDT.OK)",117.070069
110,"(BTC-USDT.OK, XLM-USDT.OK)",118.491151


In [201]:
%%time 
dist_pairs = pair_select(yr21_df, n = 40)
dist_pairs

26335
CPU times: user 14.5 s, sys: 307 ms, total: 14.8 s
Wall time: 14.6 s


Unnamed: 0,coin,score
8227,"(CNTM-USDT.OK, INX-USDT.OK)",1.86204
8191,"(CNTM-USDT.OK, DMG-USDT.OK)",3.786306
19273,"(LOON-USDT.OK, XUC-USDT.OK)",5.788662
11529,"(DMG-USDT.OK, INX-USDT.OK)",6.36098
16805,"(INX-USDT.OK, LOON-USDT.OK)",6.724833
7290,"(BTM-USDT.OK, HC-USDT.OK)",9.340523
7471,"(BTT-USDT.OK, GAS-USDT.OK)",11.05764
17666,"(JST-USDT.OK, ZRX-USDT.OK)",11.331209
8247,"(CNTM-USDT.OK, LOON-USDT.OK)",11.342561
25072,"(XSR-USDT.OK, XUC-USDT.OK)",11.653673


In [177]:
find_score(yr21_df, 'BTC-USDT.OK', 'ETH-USDT.OK')

322.5252056507295

In [199]:
c1_select(yr21_df, 'BTC-USDT.OK', 10)

230


Unnamed: 0,coin,score
61,"(BTC-USDT.OK, DOT-USDT.OK)",59.723803
197,"(BTC-USDT.OK, WBTC-USDT.OK)",168.275829
211,"(BTC-USDT.OK, XTZ-USDT.OK)",173.563409
8,"(BTC-USDT.OK, ALGO-USDT.OK)",189.730782
17,"(BTC-USDT.OK, ATOM-USDT.OK)",196.817476
138,"(BTC-USDT.OK, OMG-USDT.OK)",201.441115
131,"(BTC-USDT.OK, NEAR-USDT.OK)",219.935825
117,"(BTC-USDT.OK, LTC-USDT.OK)",240.096805
185,"(BTC-USDT.OK, TRX-USDT.OK)",253.833962
111,"(BTC-USDT.OK, LINK-USDT.OK)",257.236699


In [189]:
%%time 
dist_pairs = pair_select(yr22_df, n = 20)
dist_pairs

50403
CPU times: user 27.2 s, sys: 525 ms, total: 27.7 s
Wall time: 27.2 s


Unnamed: 0,coin,score
12969,"(BTC-USDT.OK, WBTC-USDT.OK)",0.001273
10286,"(BETH-USDT.OK, ETH-USDT.OK)",0.416863
40042,"(MASK-USDT.OK, SUSHI-USDT.OK)",2.962429
45227,"(POLS-USDT.OK, WTC-USDT.OK)",3.062684
32697,"(HC-USDT.OK, NAS-USDT.OK)",3.102218
35803,"(KAR-USDT.OK, SUSHI-USDT.OK)",3.102847
3114,"(AKITA-USDT.OK, YFI-USDT.OK)",3.162686
34683,"(IOTA-USDT.OK, MATIC-USDT.OK)",3.382933
26572,"(ENJ-USDT.OK, IOTA-USDT.OK)",3.416895
36847,"(KOL-USDT.OK, WSB-USDT.OK)",3.481807


In [192]:
find_score(yr22_df, 'BTC-USDT.OK', 'ETH-USDT.OK')

7.873979656452303

In [198]:
c1_select(yr22_df, 'BTC-USDT.OK', 10)

318


Unnamed: 0,coin,score
284,"(BTC-USDT.OK, WBTC-USDT.OK)",0.001273
34,"(BTC-USDT.OK, BETH-USDT.OK)",7.230323
103,"(BTC-USDT.OK, ETH-USDT.OK)",7.87398
33,"(BTC-USDT.OK, BCH-USDT.OK)",11.100399
25,"(BTC-USDT.OK, AVAX-USDT.OK)",14.458978
202,"(BTC-USDT.OK, ONT-USDT.OK)",14.566341
296,"(BTC-USDT.OK, XEM-USDT.OK)",14.771573
220,"(BTC-USDT.OK, QTUM-USDT.OK)",14.937708
64,"(BTC-USDT.OK, CRO-USDT.OK)",14.961949
69,"(BTC-USDT.OK, CVC-USDT.OK)",15.2008


## Cointegration

In [73]:
def coint(df, dist_pairs, sig_level = 0.01):
    df.ffill(inplace=True)
    df.bfill(inplace=True)
    cointegrated_pairs = []
    coin_list = df.columns
    all_pairs = list(itertools.combinations(coin_list, 2))
    for pair in dist_pairs:
        c0, c1 = pair
        p_value = 0
        Y, X = df[c0], df[c1]
        X = sm.add_constant(X)
        model = sm.OLS(Y, X) #ordinary least square
        results = model.fit()
        _, slope = results.params
        p_value = smts.adfuller(results.resid)[1]
        if p_value < sig_level and slope > 0:
            cointegrated_pairs.append(tuple([c0, c1, p_value]))
    return cointegrated_pairs

In [74]:
coint_pairs = coint(pivot_df, dist_pairs)
print(len(coint_pairs))
coint_pairs

14


[('BTG-USDT.OK', 'GAS-USDT.OK', 1.4634749580888076e-06),
 ('BTG-USDT.OK', 'BTT-USDT.OK', 8.283444435546068e-08),
 ('BTT-USDT.OK', 'GAS-USDT.OK', 1.068296120897675e-14),
 ('BNT-USDT.OK', 'ZRX-USDT.OK', 0.0017934117268140174),
 ('ARK-USDT.OK', 'HBAR-USDT.OK', 0.00030278855694487863),
 ('BKX-USDT.OK', 'VNT-USDT.OK', 1.8999593135753775e-09),
 ('ARK-USDT.OK', 'ICX-USDT.OK', 0.0002744744118990847),
 ('IOTA-USDT.OK', 'QTUM-USDT.OK', 0.00014813733497059383),
 ('DOGE-USDT.OK', 'ETC-USDT.OK', 0.0009103614833645906),
 ('ALGO-USDT.OK', 'ATOM-USDT.OK', 0.0009107966004838361),
 ('TRX-USDT.OK', 'XRP-USDT.OK', 0.0008167045657617323),
 ('LTC-USDT.OK', 'XLM-USDT.OK', 4.842988663309823e-06),
 ('ARK-USDT.OK', 'QTUM-USDT.OK', 0.00242127637750394),
 ('MKR-USDT.OK', 'ZEN-USDT.OK', 0.005410432440130659)]

In [75]:
def coint(df, sig_level = 0.01):
    df.ffill(inplace=True)
    df.bfill(inplace=True)
    cointegrated_pairs = []
    coin_list = df.columns
    all_pairs = list(itertools.combinations(coin_list, 2))
    for pair in [('DOGE-USDT.OK', 'SHIB-USDT.OK'),('SHIB-USDT.OK', 'DOGE-USDT.OK')]:
        c0, c1 = pair
        p_value = 0
        Y, X = df[c0], df[c1]
        X = sm.add_constant(X)
        model = sm.OLS(Y, X) #ordinary least square
        results = model.fit()
        _, slope = results.params
        p_value = smts.adfuller(results.resid)[1]
        # if p_value < sig_level and slope > 0:
        #     cointegrated_pairs.append(tuple([c0, c1, p_value]))
    return p_value

In [76]:
coint(pivot_df)

KeyError: 'SHIB-USDT.OK'