In [27]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import dcor
from functools import lru_cache
import umap
import plotly.express as px
import plotly.graph_objects as go
import pandas_market_calendars as mcal
%autosave 120
pd.options.display.float_format = "{:,.4f}".format


Autosaving every 120 seconds


In [28]:
# YOUR LOCAL PATH
import os
CWD = os.getcwd()
CWD
base_path = CWD + "\\"

# DATA LOADING


In [29]:
nyse = mcal.get_calendar('NYSE')
ny_df = nyse.schedule(start_date='2016-01-01', end_date='2022-12-31')
ny_dfq = ny_df.resample('Q').last()
ny_dfq['q_end_date'] = ny_dfq.market_close.dt.date
ny_dfq.head(2)

Unnamed: 0,market_open,market_close,q_end_date
2016-03-31,2016-03-31 13:30:00+00:00,2016-03-31 20:00:00+00:00,2016-03-31
2016-06-30,2016-06-30 13:30:00+00:00,2016-06-30 20:00:00+00:00,2016-06-30


In [30]:
ret_df = pd.read_csv('{}ticker_returns.csv'.format(base_path), index_col=0)
ret_df.index = pd.to_datetime(ret_df.index, infer_datetime_format=True)


In [31]:
ret_df.head()

Unnamed: 0_level_0,px_pct,ticker_code
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-08-06,0.0,1
2010-08-09,0.0002,1
2010-08-10,-0.0006,1
2010-08-11,-0.0006,1
2010-08-12,0.0011,1


In [32]:
ticker_map = pd.read_csv('{}synth_qis_map.csv'.format(base_path))
ticker_list = ticker_map.ticker_name.unique()


In [33]:
@lru_cache()
def map_ticker(row):
    x = ticker_map[ticker_map.ticker_code == row].ticker_name.iloc[-1]
    return x
@lru_cache()
def map_qis_code(row):
    x = ticker_map[ticker_map.ticker_code == row].qis_code.iloc[-1]
    return x
@lru_cache()
def map_qis_codeII(row):
    x = ticker_map[ticker_map.ticker_name == row].qis_code.iloc[-1]
    return x

In [34]:
ret_df['ticker'] = ret_df.ticker_code.apply(map_ticker)
ret_df['qis_code'] = ret_df.ticker_code.apply(map_qis_code)
ret_df.tail()

Unnamed: 0_level_0,px_pct,ticker_code,ticker,qis_code
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-09-26,-0.0023,163,CAPSTONE_VOLTA,EQ_Volatility_Carry
2022-09-27,0.0011,163,CAPSTONE_VOLTA,EQ_Volatility_Carry
2022-09-28,-0.0045,163,CAPSTONE_VOLTA,EQ_Volatility_Carry
2022-09-29,-0.0015,163,CAPSTONE_VOLTA,EQ_Volatility_Carry
2022-09-30,0.0006,163,CAPSTONE_VOLTA,EQ_Volatility_Carry


## Distance Correlation Functions

In [35]:
def dist_corr(a,b):
    return dcor.distance_correlation(a,b, method=dcor.DistanceCovarianceMethod.AVL)

In [36]:
def generate_dcorr(df):
    dcorr_dict = {}
    for d in (df.columns):
        dcorr_dict[d] = {}
        for e in df.columns:
            if e in dcorr_dict:
                if d in dcorr_dict[e]:
                    dcorr_dict[d][e] = dcorr_dict[e][d]
                else:
                    dcorr_dict[d][e] = dist_corr(df[d], df[e])
            else:
                dcorr_dict[d][e] = dist_corr(df[d], df[e])
    return pd.DataFrame(dcorr_dict)

### Create Data Snaps

In [37]:
@lru_cache(maxsize=2500)
def qis_px(ticker):
    df = ret_df[ret_df.ticker == ticker].copy()
    return df

In [38]:
qe_dates = ny_dfq.q_end_date.values
qe_dict = {}
for i in range(len(qe_dates)-10):
    t0 = qe_dates[i]
    t1 = qe_dates[i+10]
    
    qt_dict = {}
    for q in ticker_list:
        temp_df = qis_px(q).loc[t0:t1].tail(780)
        if len(temp_df) > 100:
            tdf = temp_df[['px_pct']].copy()
            tdf['ticker'] = q
            qt_dict[q] = tdf

    qe_dict[t1] = qt_dict
            

In [39]:
#^ create when high dist corr???

### Create snaps of distance correl through time

In [40]:
dcorr_snaps = {}
for qed in tqdm(qe_dates):
    if qed in qe_dict:
        xdf = pd.concat(qe_dict[qed].values()).pivot(columns='ticker', values='px_pct')
        xdf.fillna(0,inplace=True)
        cddf = generate_dcorr(xdf)
        dcorr_snaps[qed] = cddf


100%|██████████████████████████████████████████████████████████████████████████████████| 28/28 [00:42<00:00,  1.51s/it]


In [41]:
dcorr_snaps

{datetime.date(2018, 9, 28):                                  CAPSTONE_CPP_CONVEX  CAPSTONE_LONG_VOL  \
 CAPSTONE_CPP_CONVEX                           1.0000             0.2614   
 CAPSTONE_LONG_VOL                             0.2614             1.0000   
 CAPSTONE_VOLTA                                0.4295             0.2934   
 EQ_Volatility_Carry_1                         0.3468             0.1976   
 EQ_Volatility_Carry_10                        0.5854             0.2736   
 EQ_Volatility_Carry_11                        0.5601             0.2553   
 EQ_Volatility_Carry_18                        0.3267             0.2793   
 EQ_Volatility_Carry_19                        0.5698             0.3488   
 EQ_Volatility_Carry_2                         0.4005             0.2115   
 EQ_Volatility_Carry_21                        0.3677             0.3517   
 EQ_Volatility_Carry_22                        0.8111             0.2979   
 EQ_Volatility_Carry_23                        0.7398       

In [42]:
embedding = umap.UMAP(n_neighbors=10, min_dist=0.1).fit_transform(cddf)

In [43]:
udf = pd.DataFrame(embedding, index=cddf.index)

In [44]:
udf

Unnamed: 0,0,1
CAPSTONE_CPP_CONVEX,16.3775,12.5316
CAPSTONE_LONG_VOL,15.4922,7.2089
CAPSTONE_VOLTA,15.4842,7.4354
EQ_Volatility_Carry_1,13.7727,11.9698
EQ_Volatility_Carry_10,16.5651,16.1966
EQ_Volatility_Carry_11,16.7686,15.3871
EQ_Volatility_Carry_18,14.1352,11.2929
EQ_Volatility_Carry_19,17.412,15.4618
EQ_Volatility_Carry_2,14.167,11.9484
EQ_Volatility_Carry_21,14.9087,11.9636


In [45]:
udf.columns = ['xcor', 'ycor']
udf.index.name = None
udf['ticker'] = udf.index
udf['qis_code'] = udf.ticker.apply(map_qis_codeII)


In [46]:
zdf = udf[(~udf.ticker.isin(['CAPSTONE_CPP_CONVEX', 'CAPSTONE_LONG_VOL', 'CAPSTONE_VOLTA']))].copy()
fig = px.scatter(zdf, x="xcor", y="ycor", hover_data=['ticker', 'qis_code'], color='qis_code', 
                 color_discrete_sequence=px.colors.qualitative.Dark24 , symbol='qis_code', title="EQ VOL QIS CLUSTERING")
fig.add_trace(go.Scatter(mode='markers', x=[udf.loc['CAPSTONE_CPP_CONVEX'].xcor], y=[udf.loc['CAPSTONE_CPP_CONVEX'].ycor],
                         name='CAPSTONE CPP_CONVEX', marker=dict(color='red', size=20, symbol='star-diamond-open-dot')))
fig.add_trace(go.Scatter(mode='markers', x=[udf.loc['CAPSTONE_LONG_VOL'].xcor], y=[udf.loc['CAPSTONE_LONG_VOL'].ycor],
                         name='CAPSTONE LONG VOL', marker=dict(color='blue', size=20, symbol='star-diamond-open-dot')))
fig.add_trace(go.Scatter(mode='markers', x=[udf.loc['CAPSTONE_VOLTA'].xcor], y=[udf.loc['CAPSTONE_VOLTA'].ycor],
                         name='CAPSTONE VOLTA CARRY', marker=dict(color='green', size=20, symbol='star-diamond-open-dot')))
fig.show()
