The objective of this notebook is to identify coins that are suitable for statistical arbitrage by evaluation wether they are cointegrated and correlated.

In [None]:
#!pip3 install binance-connector

In [None]:
from statsmodels.api import OLS
from statsmodels.tsa.stattools import adfuller
from binance.spot import Spot as SpotClient
import pandas as pd
from joblib import Parallel, delayed

In [None]:
def is_cointegrated(x, y) -> bool:
    result = OLS(x, y).fit()
    adf_results = adfuller(result.resid)    
    if adf_results[0] <= adf_results[4]['10%'] and adf_results[1] <= 0.1:
        return True
    else:
        return False

In [None]:
def api_reponse_to_pandas(ohlc_dict: dict,symbol:str) -> tuple[str, pd.DataFrame]:
    df = pd.DataFrame(ohlc_dict,columns=['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'num_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
    df['close_time'] = pd.to_datetime(df['close_time'], unit='ms', utc=True)
    #check if there are is daylight savings time change
    df['close'] = df['close'].astype(float)
    df.set_index('close_time', inplace=True)
    df = df[['close']]
    return (symbol,df)

In [None]:
def check_coins(coin_1_name:str,coin_1_data:pd.DataFrame,coin_2_name:str,coin_2_data:pd.DataFrame,rolling_window:int) -> dict:
    print(coin_1_name,coin_2_name)
    #merge both dataframes to make sure the datetimes line up
    both = pd.merge(coin_1_data, coin_2_data, how='outer', left_index=True, right_index=True,suffixes=('_1','_2'))
    #now we test for rolling correlation
    both['rolling_corr'] = both['close_1'].rolling(window=rolling_window).corr(both['close_2'])
    #now we test for rolling cointegration
    both.reset_index(inplace=True)
    #now we test for rolling cointegration
    both['rolling_cointegration'] = False
    if len(both) > rolling_window+1:
        for index in both.index:
            if index > rolling_window:
                if is_cointegrated(both.loc[index-rolling_window:index,'close_1'],both.loc[index-rolling_window:index,'close_2']):
                    both.loc[index,'rolling_cointegration'] = True
        #now we save the corr abd cointegration data to be compared with all the other coins
        corr_mean = both['rolling_corr'].mean()
        coint_mean = both['rolling_cointegration'].mean()
        pair = coin_1_name + '_' + coin_2_name
        return {'pair':pair,'corr_mean':corr_mean,'coint_mean':coint_mean,'both':corr_mean*coint_mean}
    else:
        return {}

In [None]:
spot_client = SpotClient()

In [None]:
spot_info = pd.DataFrame(spot_client.exchange_info()['symbols'])
#for the sake of simplicity we will only consider coins with a stablecoin in the quote asset. 
#Also, we will only consider coins that can be shorted since we will need to buy/short both coins on the pair
quotes_to_consider = ['USD','USDT','BUSD','USDS','USDP','DAI','USDC','TUSD']
spot_info = spot_info[(spot_info.quoteAsset.isin(quotes_to_consider)) & (spot_info.status == 'TRADING')& (spot_info.isMarginTradingAllowed)]
spot_info

In [None]:
total_possible_pairs = len(spot_info)**2
f'Considering {len(spot_info)} coins for a possible {total_possible_pairs} trading pairs'

Now we can proceed to request OHLCV data and test all these pairs for coint/corr! We will do this in parallel to speed things up.

In [None]:
symbols:list[str] = spot_info.symbol.tolist()
symbols_reversed = []
for i in range(len(symbols)-1,-1,-1):
    symbols_reversed.append(symbols[i-1])
len(symbols),len(symbols_reversed)

In [None]:
coins_data:list[tuple[str,pd.DataFrame]] = (Parallel(n_jobs=-2)(delayed(api_reponse_to_pandas)(spot_client.klines(symbol=sym, interval='5m',limit=1000),sym) for sym in symbols))
#now we transfrom the list of tuples to a dictionary to its easy to work with
coins_data:dict[str,pd.DataFrame] = dict(coins_data)


In [None]:
results = (Parallel(n_jobs=-2)(delayed(check_coins)(symbol,coins_data[symbol],symbol2,coins_data[symbol2],500) for symbol in symbols for symbol2 in symbols_reversed if symbol != symbol2))

In [None]:
results_frame = pd.DataFrame(results)
results_frame.sort_values(by=['both'],ascending=False,inplace=True)
results_frame