In [1]:
!pip install hurst



In [2]:
import yfinance as yf
from datetime import datetime, timedelta, timezone
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
import seaborn as sns
from tqdm import tqdm
import pandas as pd
from statsmodels.tools.sm_exceptions import ValueWarning, HessianInversionWarning, ConvergenceWarning
import warnings
from time import time
from sklearn.metrics import mean_squared_error
import csv
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import itertools
from scipy.stats import chi2
from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn
from hurst import compute_Hc
from statsmodels.api import OLS




warnings.filterwarnings('ignore', category=ValueWarning)
warnings.filterwarnings('ignore', category=HessianInversionWarning)
warnings.filterwarnings('ignore', category=ConvergenceWarning)
warnings.filterwarnings('ignore', category=UserWarning)

# **Get Data**

In [3]:
cryptos = ['BTC-USD', 'ETH-USD', 'BNB-USD', 'SOL-USD', 'XRP-USD', 'USDC-USD', 'STETH-USD', 'ADA-USD', 'AVAX-USD', 'DOGE-USD', 'DOT-USD', 'WTRX-USD', 'TRX-USD', 'MATIC-USD', 'LINK-USD', 'TON11419-USD', 'WBTC-USD', 'SHIB-USD', 'ICP-USD', 'DAI-USD', 'LTC-USD', 'BCH-USD', 'UNI7083-USD', 'ATOM-USD', 'LEO-USD', 'NEAR-USD', 'XLM-USD', 'OKB-USD', 'INJ-USD', 'OP-USD', 'FIL-USD', 'ETC-USD', 'APT21794-USD', 'LDO-USD', 'XMR-USD', 'HBAR-USD', 'BXC5168-USD', 'IMX10603-USD', 'KAS-USD', 'BTCB-USD', 'STX4847-USD', 'CRO-USD', 'TUSD-USD', 'VET-USD']

prices = {}
returns = {}

start_date = datetime(2022, 11, 1)
end_date = datetime(2023, 11, 1)

for crypto in cryptos:
    ticker = yf.Ticker(crypto)
    prices_1h = yf.download(crypto, start=start_date, end=end_date, interval='1h')
    prices_4h = prices_1h.groupby(pd.Grouper(freq='4H')).agg({'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'})
    prices_4h = prices_4h.fillna(method='ffill')


    if crypto not in prices:
        prices[crypto] = []
        returns[crypto] = []

    prices[crypto] = prices_4h['Close']


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [4]:
df = pd.DataFrame(prices)
print(df)

                                BTC-USD      ETH-USD     BNB-USD    SOL-USD  \
Datetime                                                                      
2022-11-01 00:00:00+00:00  20496.050781  1586.926025  330.145477  32.656120   
2022-11-01 04:00:00+00:00  20585.644531  1589.435059  327.175171  33.019585   
2022-11-01 08:00:00+00:00  20528.517578  1590.957397  323.703339  32.927696   
2022-11-01 12:00:00+00:00  20423.361328  1572.205200  319.484528  32.427967   
2022-11-01 16:00:00+00:00  20447.882812  1577.353760  324.656799  32.250702   
...                                 ...          ...         ...        ...   
2023-10-31 04:00:00+00:00  34153.902344  1791.078735  226.252411  35.730648   
2023-10-31 08:00:00+00:00  34518.582031  1810.569580  228.285156  36.603596   
2023-10-31 12:00:00+00:00  34290.898438  1794.058105  224.795120  36.138054   
2023-10-31 16:00:00+00:00  34493.507812  1809.860474  225.640991  36.238384   
2023-10-31 20:00:00+00:00  34668.410156  1816.861450

# **All Variables**

In [5]:
weights = {}
results_of_cointegration = {}
p_values = {}

# **Coint Johansen**

In [6]:
def check_cointegration(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    if np.all(result.lr1 > result.cvt[:, 1]):
        return result
    else:
        return None

# **Generate Combination of Time Series**

In [7]:
df = pd.DataFrame(prices)

def generate_stationary_series(df, max_len=4):
    stationary_series = []
    cols = list(df.columns)
    for i in range(2, max_len+1):
        combinations = list(itertools.combinations(cols, i))
        for combination in combinations:
            result = check_cointegration(df[list(combination)])
            if result is not None:
                stationary_series.append(combination)
                normalized_weights = result.evec[:, 0] / result.evec[:, 0][0]
                weights[f'{combination}'] = normalized_weights
                results_of_cointegration[f'{combination}'] = result
    return stationary_series

In [8]:
stationary_series = generate_stationary_series(df)

In [9]:
for series in stationary_series:
  print(series)

('ETH-USD', 'CRO-USD')
('SOL-USD', 'BCH-USD')
('SOL-USD', 'XMR-USD')
('SOL-USD', 'BXC5168-USD')
('XRP-USD', 'USDC-USD')
('XRP-USD', 'DOGE-USD')
('XRP-USD', 'DAI-USD')
('XRP-USD', 'LTC-USD')
('XRP-USD', 'LEO-USD')
('XRP-USD', 'NEAR-USD')
('XRP-USD', 'XLM-USD')
('XRP-USD', 'CRO-USD')
('XRP-USD', 'TUSD-USD')
('USDC-USD', 'ADA-USD')
('USDC-USD', 'AVAX-USD')
('USDC-USD', 'DOGE-USD')
('USDC-USD', 'DOT-USD')
('USDC-USD', 'SHIB-USD')
('USDC-USD', 'DAI-USD')
('USDC-USD', 'LTC-USD')
('USDC-USD', 'UNI7083-USD')
('USDC-USD', 'ATOM-USD')
('USDC-USD', 'LEO-USD')
('USDC-USD', 'NEAR-USD')
('USDC-USD', 'XLM-USD')
('USDC-USD', 'OKB-USD')
('USDC-USD', 'FIL-USD')
('USDC-USD', 'ETC-USD')
('USDC-USD', 'LDO-USD')
('USDC-USD', 'XMR-USD')
('USDC-USD', 'HBAR-USD')
('USDC-USD', 'BXC5168-USD')
('USDC-USD', 'CRO-USD')
('USDC-USD', 'TUSD-USD')
('USDC-USD', 'VET-USD')
('STETH-USD', 'CRO-USD')
('ADA-USD', 'AVAX-USD')
('ADA-USD', 'ICP-USD')
('ADA-USD', 'DAI-USD')
('ADA-USD', 'NEAR-USD')
('ADA-USD', 'FIL-USD')
('ADA-US

# **Compute Price of Time Series**

In [34]:
combination_of_timeseries = {}
for key, weight in weights.items():
  val = key.split(',')
  cleaned_list = [s.replace('(', '').replace(')', '').replace(' ', '').replace("'", "") for s in val]
  time_series = df[cleaned_list].dot(weights[key])
  if f'{cleaned_list}' not in combination_of_timeseries:
    combination_of_timeseries[f'{cleaned_list}'] = {}
  combination_of_timeseries[f'{cleaned_list}']['value'] = time_series

print(combination_of_timeseries)

{"['ETH-USD', 'CRO-USD']": {'value': Datetime
2022-11-01 00:00:00+00:00   -28568.172375
2022-11-01 04:00:00+00:00   -28677.136249
2022-11-01 08:00:00+00:00   -28451.344994
2022-11-01 12:00:00+00:00   -28307.480125
2022-11-01 16:00:00+00:00   -28063.730023
                                 ...     
2023-10-31 04:00:00+00:00   -14353.289955
2023-10-31 08:00:00+00:00   -14522.543009
2023-10-31 12:00:00+00:00   -14162.609940
2023-10-31 16:00:00+00:00   -14279.761619
2023-10-31 20:00:00+00:00   -14615.918962
Freq: 4H, Length: 2190, dtype: float64}, "['SOL-USD', 'BCH-USD']": {'value': Datetime
2022-11-01 00:00:00+00:00    22.437341
2022-11-01 04:00:00+00:00    22.799816
2022-11-01 08:00:00+00:00    22.694668
2022-11-01 12:00:00+00:00    22.251109
2022-11-01 16:00:00+00:00    22.039360
                               ...    
2023-10-31 04:00:00+00:00    14.190530
2023-10-31 08:00:00+00:00    14.765606
2023-10-31 12:00:00+00:00    14.861515
2023-10-31 16:00:00+00:00    14.776973
2023-10-31 20:00

# **Compute P Value**

In [43]:
coint_series_pvalues = {}
for key, val in combination_of_timeseries.items():
  ad_fuller_result = adfuller(combination_of_timeseries[key]['value'])
  series_pvalue = ad_fuller_result[1]
  combination_of_timeseries[key]['p_value'] = series_pvalue

# **Selection**

In [49]:
sorted_combination_of_timeseries = {k: v for k, v in sorted(combination_of_timeseries.items(), key=lambda item: item[1]['p_value'])}
items = list(sorted_combination_of_timeseries.items())
sliced_items = items[:10]
sorted_combination_of_timeseries = dict(sliced_items)
for key, val in sorted_combination_of_timeseries.items():
  print(sorted_combination_of_timeseries[key]['p_value'])

3.067441286665808e-28
4.0844166521930885e-28
6.772035471495494e-28
7.593652301009903e-28
8.158658797654966e-28
9.752147741583366e-28
1.0160963472048486e-27
1.2157161262746325e-27
1.2206251541441866e-27
1.3549459242350394e-27
10


# **Compute Hurst Exponents**

In [50]:
hurst_exponents = {}
for key, val in sorted_combination_of_timeseries.items():
  H, c, data = compute_Hc(sorted_combination_of_timeseries[key]['value'])
  sorted_combination_of_timeseries[key]['hurst_exponents'] = H

# **Compute Half Life Time**

In [52]:
def compute_half_life(series):
    price = pd.Series(series)
    lagged_price = price.shift(1).fillna(method="bfill")
    delta = price - lagged_price
    beta = np.polyfit(lagged_price, delta, 1)[0]
    half_life = -np.log(2) / beta
    return half_life

In [56]:
for key, val in sorted_combination_of_timeseries.items():
  sorted_combination_of_timeseries[key]['half_life'] = compute_half_life(sorted_combination_of_timeseries[key]['value'])

In [57]:
sorted_combination_of_timeseries = pd.DataFrame(sorted_combination_of_timeseries)

In [58]:
sorted_combination_of_timeseries

Unnamed: 0,"['XRP-USD', 'USDC-USD', 'DAI-USD', 'LEO-USD']","['XRP-USD', 'USDC-USD', 'DAI-USD', 'LTC-USD']","['USDC-USD', 'DAI-USD', 'LTC-USD', 'ETC-USD']","['ETH-USD', 'USDC-USD', 'DAI-USD', 'CRO-USD']","['USDC-USD', 'STETH-USD', 'DAI-USD', 'CRO-USD']","['USDC-USD', 'DAI-USD', 'LEO-USD', 'XLM-USD']","['USDC-USD', 'DAI-USD', 'OKB-USD', 'XMR-USD']","['USDC-USD', 'AVAX-USD', 'DAI-USD', 'NEAR-USD']","['USDC-USD', 'DAI-USD', 'OKB-USD', 'STX4847-USD']","['XRP-USD', 'USDC-USD', 'DAI-USD', 'XLM-USD']"
value,Datetime 2022-11-01 00:00:00+00:00 105.7272...,Datetime 2022-11-01 00:00:00+00:00 168.9372...,Datetime 2022-11-01 00:00:00+00:00 -0.127528...,Datetime 2022-11-01 00:00:00+00:00 303450.5...,Datetime 2022-11-01 00:00:00+00:00 -0.126828...,Datetime 2022-11-01 00:00:00+00:00 -0.125378...,Datetime 2022-11-01 00:00:00+00:00 -0.125610...,Datetime 2022-11-01 00:00:00+00:00 -0.127816...,Datetime 2022-11-01 00:00:00+00:00 -0.128788...,Datetime 2022-11-01 00:00:00+00:00 82.10259...
p_value,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
hurst_exponents,0.490125,0.489499,0.490151,0.490556,0.49042,0.490527,0.490698,0.489763,0.489541,0.490164
half_lives,0.970305,0.968121,0.968485,0.973068,0.971763,0.97612,0.975788,0.972203,0.969534,0.975963
