# Cointegration and Copulas Study 

_Cointegration_ fails all the way .. 

In [1]:
import pandas as pd
import numpy as np
from rosettastone.utils.saveandload import WAREHOUSE
from rosettastone.algos.arbitrage.copulae import EmpProbIntegralTransform as F
from rosettastone.algos.arbitrage.copulae import BivariateCopula

contract_1, contract_2 = "IH", "IF"
contract_3 = "IC"
freq = '1min'
date_range = '20220801-20221001'

df_kind_1 = 'df0'
df_kind_2 = 'df0'
df_kind_3 = 'df0'

prefix_1 = f'{contract_1}.{date_range}.sampled.ohlc.{freq}'
prefix_2 = f'{contract_2}.{date_range}.sampled.ohlc.{freq}'
prefix_3 = f'{contract_3}.{date_range}.sampled.ohlc.{freq}'

path_1 = WAREHOUSE.joinpath(prefix_1+'.'+df_kind_1+'.parquet.brotli')
path_2 = WAREHOUSE.joinpath(prefix_2+'.'+df_kind_2+'.parquet.brotli')
path_3 = WAREHOUSE.joinpath(prefix_3+'.'+df_kind_3+'.parquet.brotli')

df_1 = pd.read_parquet(path_1, engine='pyarrow')
df_2 = pd.read_parquet(path_2, engine='pyarrow')
df_3 = pd.read_parquet(path_3, engine='pyarrow')

print(f'd1 and d2 are of the same shape?  {df_1.shape == df_2.shape}')
print(f'd2 and d3 are of the same shape?  {df_2.shape == df_3.shape}')
from rosettastone.algos.arbitrage.cointegration import is_cointegrated

d1 and d2 are of the same shape?  True
d2 and d3 are of the same shape?  True


In [2]:
df_1.close.iloc[5150], df_2.close.iloc[5150],df_3.close.iloc[5150], df_1.code.iloc[5150], df_2.code.iloc[5150], df_3.code.iloc[5150]

(2775.0, 4095.8, 6174.6, 'IH2209', 'IF2209', 'IC2209')

In [3]:
m1, m2, m3 = df_1.close.median(), df_2.close.median(), df_3.close.median()
print(m1, m2, m3)

2747.2 4106.2 6278.4


In [4]:
n_training = int(df_1.shape[0]*0.7)
spread = (df_1.close - df_2.close).to_numpy(dtype=float)[:n_training]
print(f'number of training samples: {n_training}')

number of training samples: 5153


In [5]:
import plotly.graph_objects as go
fig = go.Figure() #type:ignore
fig.add_trace(go.Scatter(x=df_1.market_time.iloc[:n_training], 
                         y=spread, name=f'{contract_1}-{contract_2}')) #type:ignore

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'),
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_layout(title=f'spread {contract_1}-{contract_2}')

fig.show()

In [6]:
price_1 = df_1.close.to_numpy(dtype=float)[:n_training]
price_2 = df_2.close.to_numpy(dtype=float)[:n_training]
is_cointegrated(price_1, price_2)

------------------------------
ADF Statistic: -0.674256
p-value: 0.423453
Critical Values:
	1%: -2.566
	5%: -1.941
	10%: -1.617
cannot reject the null hypothesis, there is a unit root
verdict: nonstationary residuals!


False

In [7]:
spread_one_half = 2*df_2.close.to_numpy()-3*df_1.close.to_numpy()
import plotly.graph_objects as go

fig = go.Figure() #type:ignore
fig.add_trace(go.Scatter(x=df_1.market_time, 
                         y=spread_one_half, name=f'2*{contract_2}-3*{contract_1}')) #type:ignore

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'),
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_layout(title=f'spread: 2{contract_2}-3{contract_1}')

fig.show()

In [8]:
spread_3d = 7*df_1.close.to_numpy()+6*df_2.close.to_numpy()-7*df_3.close.to_numpy()

fig = go.Figure() #type:ignore
fig.add_trace(go.Scatter(x=df_1.market_time, 
                         y=spread_3d, )) #type:ignore

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'),
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_layout(title=f'spread: 7{contract_1}+6{contract_2}-7{contract_3}')

fig.show()

In [9]:
spread_3d_2 = -4*df_1.close.to_numpy()+1*df_2.close.to_numpy()+1*df_3.close.to_numpy()

fig = go.Figure() #type:ignore
fig.add_trace(go.Scatter(x=df_1.market_time, 
                         y=spread_3d_2, )) #type:ignore

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'),
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_layout(title=f'spread: 2{contract_1}-3{contract_2}+{contract_3}')

fig.show()

In [10]:
from plotly.subplots import make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]]) #type:ignore

fig.add_trace(go.Scatter(x=df_1.market_time, 
                         y=spread_3d_2,
                         name=f'-4{contract_1}+1{contract_2}+1{contract_3}' ), #type:ignore
              secondary_y=False)#type:ignore

fig.add_trace(go.Scatter(x=df_1.market_time,
                         y=spread_3d, 
                         name=f'7{contract_1}+6{contract_2}-7{contract_3}'), #type:ignore
              secondary_y=True)

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'), 
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_layout(title=f'combinations of {contract_1}, {contract_2}, {contract_3}')
fig.update_yaxes(title_text=f'-4{contract_1}+1{contract_2}+1{contract_3}', secondary_y=False) #type:ignore
fig.update_yaxes(title_text=f'7{contract_1}+6{contract_2}-7{contract_3}', secondary_y=True) #type:ignore

fig.show()

In [11]:
fig = make_subplots(specs=[[{"secondary_y": True}]]) #type:ignore
fig.add_trace(go.Scatter(x=df_1.market_time, 
                         y= df_1.close.to_numpy(),
                         name=f'{contract_1}' ), #type:ignore
              secondary_y=False) #type:ignore

fig.add_trace(go.Scatter(x=df_1.market_time,
                         y=df_2.close.to_numpy(), 
                         name=f'{contract_2}'), #type:ignore
              secondary_y=True) #type:ignore

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'), 
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_layout(title=f'{contract_1} vs {contract_2}')
fig.update_yaxes(title_text=f'{contract_1}', secondary_y=False) #type:ignore
fig.update_yaxes(title_text=f'{contract_2}', secondary_y=True) #type:ignore
fig.show()

In [12]:
df_1['returns'] = df_1.close/df_1.open -1
df_2['returns'] = df_2.close/df_2.open -1

fig = make_subplots(specs=[[{"secondary_y": True}]]) #type:ignore
fig.add_trace(go.Scatter(x=df_1.market_time, 
                         y= df_1.returns.to_numpy(),
                         name=f'{contract_1}' ), #type:ignore
              secondary_y=False) #type:ignore

fig.add_trace(go.Scatter(x=df_1.market_time,
                         y=df_2.returns.to_numpy(), 
                         name=f'{contract_2}'), #type:ignore
              secondary_y=True) #type:ignore

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'), 
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_layout(title=f'Returns:  {contract_1} vs {contract_2}')

fig.show()

## Mispriced Signal Illustration

In [13]:
from rosettastone.algos.arbitrage.paired import Pairing

aPair = Pairing(df_1, df_2)
aPair.train_test_split(test_size=0.3)
aPair.get_misprice_index()


number of days in the training set: 21
number of days in the test set: 10
I am running `get_uniform_returns` first
Bivariate Copula Fitting Result
<pyvinecopulib.Bicop>
Student, parameters = 0.862279
 5.48884


In [20]:
threshold = (-4, 4)
(idx_short, idx_long), idx_multiple = aPair.thresholding(threshold)
((idx_short_XY, idx_long_XY), (idx_long_YX, idx_short_YX)) = idx_multiple

In [27]:
fig = make_subplots(specs=[[{"secondary_y": True}]]) #type:ignore
spread_all = df_1.close.to_numpy() - df_2.close.to_numpy()
# all spread
fig.add_trace(go.Scatter(x=df_1.market_time, 
                         y=spread_all, name=f'{contract_1}-{contract_2}'), #type:ignore
              secondary_y=False)

# with long-short signals
fig.add_trace(go.Scatter(x=df_1.market_time.iloc[idx_long], 
                         y=spread_all[idx_long],
                         mode='markers',
                         marker=dict(size=3), 
                         name='idx_long'), #type:ignore
              secondary_y=False
    
)

fig.add_trace(go.Scatter(x=df_1.market_time.iloc[idx_short], 
                         y=spread_all[idx_short],
                         mode='markers',
                         marker=dict(size=3), 
                         name='idx_short'), #type:ignore
              secondary_y=False    
)


fig.add_trace(go.Scatter(x=df_1.market_time,
                         y=aPair.flag_XY, 
                         name='Flag: X|Y'), #type:ignore
              secondary_y=True) #type:ignore

fig.add_trace(go.Scatter(x=df_1.market_time,
                         y=aPair.flag_YX,
                         name='Flag: Y|X'), #type:ignore
              secondary_y=True) #type:ignore

fig.update_xaxes(rangeslider_visible=True, # type:ignore
                 rangebreaks=[
                                dict(bounds=['sat', 'mon']), 
                                dict(bounds=[15, 9.5], pattern='hour'),
                                dict(bounds=[11.5, 13], pattern='hour')]
                 )

fig.update_yaxes(title_text=f'Spread: {contract_1}-{contract_2}, or X-Y', secondary_y=False) #type:ignore
fig.update_yaxes(title_text=f'Flags', secondary_y=True) #type:ignore

fig.update_layout(title=f'spread {contract_1}-{contract_2}, signal with threshold: {threshold}')

fig.show()