In [1]:
#find all possible pairs
symbols = ['ADA', 'AION', 'ANT', 'ARDR', 'BAT', 'BCC', 'BCH', 'BLZ', 'BNB', 'BNT', 'BTC', 'BTS', 'BUSD', 'CTXC', 'CVC', 'DAI', 'DASH', 'DATA', 'DCR', 'DENT', 'DGB', 'DNT', 'DOGE', 'ENJ', 'EOS', 'ETC', 'ETH', 'FUN', 'GTO', 'GXS', 'ICX', 'IOST', 'IOTA', 'IOTX', 'KMD', 'KNC', 'LEND', 'LINK', 'LRC', 'LSK', 'LTC', 'MANA', 'MCO', 'MITH', 'MKR', 'NANO', 'NEO', 'NMR', 'NPXS', 'NULS', 'OMG', 'ONT', 'QTUM', 'REP', 'RLC', 'SC', 'SNX', 'STORJ', 'STORM', 'STRAT', 'STX', 'THETA', 'TRX', 'TUSD', 'UTK', 'VEN', 'WAN', 'WAVES', 'WTC', 'XLM', 'XMR', 'XRP', 'XTZ', 'XZC', 'ZEC', 'ZEN', 'ZIL', 'ZRX']

def possible_pairs():
        pairs = []
        for asset_1 in symbols:
            for asset_2 in symbols:
                if not asset_1 == asset_2:
                    pairs.append([asset_1, asset_2])
        return pairs

In [2]:
len(symbols)

78

In [3]:
%time pairs = possible_pairs()
len(pairs)

CPU times: user 6.24 ms, sys: 38 µs, total: 6.28 ms
Wall time: 4.96 ms


6006

In [4]:
from statsmodels.tsa.stattools import coint
import pandas as pd
from tqdm import tqdm


def cointegrated(series_1, series_2, p_threshold=0.01):
    #return true if significantly cointegrated
    pval = coint(series_1, series_2)[1]
    #print(pval)
    return pval < p_threshold, pval

def get_close_prices(asset_1, asset_2):
    filename_1 = "Folder/{}.csv".format(asset_1)
    df_1 = pd.read_csv(filename_1)
    
    filename_2 = "Folder/{}.csv".format(asset_2)
    df_2 = pd.read_csv(filename_2)
    
    prices_1 = df_1['close']
    prices_2 = df_2['close']
    
    #make sure the prices are equal length
    if len(prices_1) > len(prices_2):
        prices_1 = prices_1[-len(prices_2):]
    elif len(prices_2) > len(prices_1):
        prices_2 = prices_2[-len(prices_1):]
        
    return prices_1, prices_2

def find_coint_pairs(pairs):
    selected_pairs = {}
    
    for idx, pair in tqdm(enumerate(pairs), total=len(pairs)):
        #print(idx,'/',len(pairs))
        #print("finding cointegration of {}\n".format(pair))
        asset_1 = pair[0]
        asset_2 = pair[1]
        
        prices_1, prices_2 = get_close_prices(asset_1, asset_2)
        is_cointegrated, pval = cointegrated(prices_1, prices_2)
        if is_cointegrated:
            #print("Added {}\n".format(pair))
            #print(pval)
            selected_pairs[asset_1+'|'+asset_2] = [asset_1, asset_2, pval]
    return selected_pairs  

selected_pairs = find_coint_pairs(pairs)

100%|███████████████████████████████████████████████████████████████| 6006/6006 [06:42<00:00, 14.92it/s]


In [5]:
df = pd.DataFrame.from_dict(selected_pairs)
filename = "Cointegrated Pairs.csv"
df.to_csv(filename, index=False)

In [6]:
len(df)

3

In [7]:
df.head()

Unnamed: 0,ADA|ANT,ADA|BLZ,ADA|DAI,ADA|DATA,ADA|DCR,ADA|IOTX,ADA|KMD,ADA|LRC,ADA|MANA,ADA|NANO,...,ZIL|SNX,ZIL|XLM,ZIL|ZEN,ZIL|ZRX,ZRX|GXS,ZRX|SC,ZRX|SNX,ZRX|XLM,ZRX|ZEN,ZRX|ZIL
0,ADA,ADA,ADA,ADA,ADA,ADA,ADA,ADA,ADA,ADA,...,ZIL,ZIL,ZIL,ZIL,ZRX,ZRX,ZRX,ZRX,ZRX,ZRX
1,ANT,BLZ,DAI,DATA,DCR,IOTX,KMD,LRC,MANA,NANO,...,SNX,XLM,ZEN,ZRX,GXS,SC,SNX,XLM,ZEN,ZIL
2,0.003559,0.001543,0.0,0.000062,0.008913,0.00448,0.002932,0.001953,0.000062,0.000937,...,0.000148,0.000777,0.000007,0.002052,0.005159,0.000024,0.0,0.001768,0.000015,0.000655


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Columns: 720 entries, ADA|ANT to ZRX|ZIL
dtypes: object(720)
memory usage: 17.0+ KB


In [9]:
list(df.keys())

['ADA|ANT',
 'ADA|BLZ',
 'ADA|DAI',
 'ADA|DATA',
 'ADA|DCR',
 'ADA|IOTX',
 'ADA|KMD',
 'ADA|LRC',
 'ADA|MANA',
 'ADA|NANO',
 'ADA|SC',
 'ADA|SNX',
 'ADA|STORJ',
 'ADA|TRX',
 'ADA|WAVES',
 'ADA|WTC',
 'ADA|XRP',
 'ADA|ZEN',
 'ANT|STORJ',
 'BAT|ARDR',
 'BAT|DAI',
 'BAT|DGB',
 'BAT|ENJ',
 'BAT|GXS',
 'BAT|KNC',
 'BAT|LRC',
 'BAT|MKR',
 'BAT|NPXS',
 'BAT|REP',
 'BAT|SC',
 'BAT|SNX',
 'BAT|VEN',
 'BAT|ZEN',
 'BCH|DAI',
 'BNB|ARDR',
 'BNB|DATA',
 'BNB|DCR',
 'BNB|DGB',
 'BNB|ENJ',
 'BNB|GXS',
 'BNB|LRC',
 'BNB|MKR',
 'BNB|NANO',
 'BNB|REP',
 'BNB|STORJ',
 'BNB|WTC',
 'BNT|LSK',
 'BTC|ANT',
 'BTC|BLZ',
 'BTC|DCR',
 'BTC|DGB',
 'BTC|KMD',
 'BTC|LEND',
 'BTC|LRC',
 'BTC|MANA',
 'BTC|MKR',
 'BTC|NMR',
 'BTC|REP',
 'BTC|SC',
 'BTC|SNX',
 'BTC|STORJ',
 'BTC|STRAT',
 'BTC|ZEN',
 'BTS|BNT',
 'BTS|LSK',
 'BUSD|ADA',
 'BUSD|ANT',
 'BUSD|BAT',
 'BUSD|BCH',
 'BUSD|BLZ',
 'BUSD|BNB',
 'BUSD|BNT',
 'BUSD|BTC',
 'BUSD|BTS',
 'BUSD|CTXC',
 'BUSD|CVC',
 'BUSD|DASH',
 'BUSD|DCR',
 'BUSD|DENT',
 'BUSD|DGB',
 '

In [10]:
 len(list(df.keys()))/len(pairs)

0.11988011988011989