# QIS Cluster Analysis

## Import Libraries

In [1]:
import os
os.getcwd()
os.chdir('C:\\Users\\RRQ1FYQ\\Documents\\RMP')

In [2]:
from EquityHedging.datamanager import data_manager as dm
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as shc
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.express as px
import plotly.offline as py
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster 
import plotly.figure_factory as ff
import plotly.graph_objects as go
from scipy.spatial.distance import pdist, squareform
import ipywidgets as widgets
from ipywidgets import interact


## Import Normalized Hedge Metrics

In [3]:
file_path = dm.EQUITY_HEDGE_DATA+'def_uni_hedge_metrics_more.xlsx'
#normalized_hm = pd.read_excel(dm.EQUITY_HEDGE_DATA+'def_uni_hedge_metrics_more.xlsx', index_col=(0))
normalized_hm = pd.read_excel(dm.QIS_UNIVERSE +'Normalized_Hedge_Metrics.xlsx', index_col=(0))
features = ['Downside Reliability', 'Convexity', 'Cost', 'Decay']
normalized_hm = normalized_hm[features]
normalized_hm.head(20)

Unnamed: 0,Downside Reliability,Convexity,Cost,Decay
Barclays Weekly Short Var Hourly Index BXIIUWFH Index,0.845629,0.001556,0.952677,1.0
Barclays US Small Cap Weekly Var Hourly Index BXIIRWVH Index,0.790128,0.001557,0.940718,1.0
Barclays Eurozone Weekly Short Var Hourly Index BXIIESFH Index,0.708328,0.001306,0.95796,1.0
Barclays US Long Short Variance Relative Value Strategy BXIIUVR2 Index,0.52779,1.0,0.0,0.10137
Barclays VIX Options Risk Premium Series II Strategy Excess Return BEFSDHVD Index,0.661711,0.018968,0.885105,0.687671
Barclays Global Multi-Factor MH EW Index BXIIWFHU Index,0.589812,0.001812,0.95877,0.531507
Barclays Global Multi-Factor MH EW Index HiC BXIIWHIC Index,0.58752,0.001346,0.958411,0.271233
Global Multi-Factor MH 2x Leveraged BXIIWFH2 Index,0.590719,0.003914,0.9156,0.410959
Barclays Global Multi-Factor MH 3x Leveraged BXIIWFH3 Index,0.591646,0.006097,0.872037,0.410959
Barclays US Multi-Factor MH EW Index BXIIUFHU Index,0.547069,0.004272,0.924251,0.063014


In [4]:
normalized_hm = pd.read_excel(dm.QIS_UNIVERSE + 'Normalized_Hedge_Metrics.xlsx', index_col=0)

features = ['Downside Reliability',  'Convexity', 'Cost', 'Decay']
normalized_hm = normalized_hm[features]

normalized_hm.head(100)

Unnamed: 0,Downside Reliability,Convexity,Cost,Decay
Barclays Weekly Short Var Hourly Index BXIIUWFH Index,0.845629,0.001556,0.952677,1.000000
Barclays US Small Cap Weekly Var Hourly Index BXIIRWVH Index,0.790128,0.001557,0.940718,1.000000
Barclays Eurozone Weekly Short Var Hourly Index BXIIESFH Index,0.708328,0.001306,0.957960,1.000000
Barclays US Long Short Variance Relative Value Strategy BXIIUVR2 Index,0.527790,1.000000,0.000000,0.101370
Barclays VIX Options Risk Premium Series II Strategy Excess Return BEFSDHVD Index,0.661711,0.018968,0.885105,0.687671
...,...,...,...,...
Barclays China Equity Intraday Momentum Index BXIICIDB Index,0.467750,0.012007,0.900905,0.369863
Barclays Australia Equity Intraday Momentum Index BXIIOIDB Index,0.448056,0.005627,0.955460,1.000000
Barclays US Long Short Variance Hedging Index BXIIUVS2 Index,0.526680,0.588517,0.164816,0.082192
Barclays US Long Short Variance Tail Hedge Index BXIIUVT2 Index,0.517847,0.287613,0.389965,0.035616


In [5]:
# Calculate the sum of features for each strategy (row-wise sum)
normalized_hm['Total'] = normalized_hm.sum(axis=1)

normalized_hm.head(100)

Unnamed: 0,Downside Reliability,Convexity,Cost,Decay,Total
Barclays Weekly Short Var Hourly Index BXIIUWFH Index,0.845629,0.001556,0.952677,1.000000,2.799862
Barclays US Small Cap Weekly Var Hourly Index BXIIRWVH Index,0.790128,0.001557,0.940718,1.000000,2.732403
Barclays Eurozone Weekly Short Var Hourly Index BXIIESFH Index,0.708328,0.001306,0.957960,1.000000,2.667594
Barclays US Long Short Variance Relative Value Strategy BXIIUVR2 Index,0.527790,1.000000,0.000000,0.101370,1.629160
Barclays VIX Options Risk Premium Series II Strategy Excess Return BEFSDHVD Index,0.661711,0.018968,0.885105,0.687671,2.253455
...,...,...,...,...,...
Barclays China Equity Intraday Momentum Index BXIICIDB Index,0.467750,0.012007,0.900905,0.369863,1.750526
Barclays Australia Equity Intraday Momentum Index BXIIOIDB Index,0.448056,0.005627,0.955460,1.000000,2.409142
Barclays US Long Short Variance Hedging Index BXIIUVS2 Index,0.526680,0.588517,0.164816,0.082192,1.362205
Barclays US Long Short Variance Tail Hedge Index BXIIUVT2 Index,0.517847,0.287613,0.389965,0.035616,1.231041


# Devang's code

In [27]:
# Calculate the pairwise distances between strategies
distances = pdist(normalized_hm[features])

# Convert the pairwise distances to a square distance matrix
square_distances = squareform(distances)

# Perform hierarchical clustering using linkage on the square distance matrix
linked = linkage(square_distances, 'ward')

num_clusters = 3

clusters = fcluster(linked, t=num_clusters, criterion='maxclust')

normalized_hm['Cluster'] = clusters


best_strategies = normalized_hm.groupby('Cluster')['Total'].idxmax()

# Get the rows corresponding to the best strategies
best_performing_strategies = normalized_hm.loc[best_strategies]

print("Best Performing Strategies in Each Cluster:")
print(best_performing_strategies)

#  function to display the strategies in the selected cluster
def show_strategies_in_cluster(selected_cluster):
    selected_strategies = normalized_hm[normalized_hm['Cluster'] == selected_cluster].index.tolist()
    print(f"\nCluster {selected_cluster} Strategies:")
    print(selected_strategies)

#  a dropdown widget for selecting different cluster numbers
cluster_dropdown = widgets.Dropdown(
    options=normalized_hm['Cluster'].unique(),
    description='Select Cluster:',
    value=num_clusters,  # Default value is the total number of clusters
)


interact(show_strategies_in_cluster, selected_cluster=cluster_dropdown);


scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix



Best Performing Strategies in Each Cluster:
                                                    Downside Reliability  \
UBS Epsilon Theory Narrative Alpha Long-Only St...              0.934067   
BNP TIER US Index  BNPITIU1 Index                               0.944732   
Macquarie US Equity Weekly Volatility Carry Sel...              0.652370   

                                                    Convexity      Cost  \
UBS Epsilon Theory Narrative Alpha Long-Only St...   0.011112  0.842824   
BNP TIER US Index  BNPITIU1 Index                    0.010237  0.933276   
Macquarie US Equity Weekly Volatility Carry Sel...   0.000676  0.987556   

                                                       Decay     Total  \
UBS Epsilon Theory Narrative Alpha Long-Only St...  0.632877  2.420880   
BNP TIER US Index  BNPITIU1 Index                   1.000000  2.888244   
Macquarie US Equity Weekly Volatility Carry Sel...  0.857534  2.498136   

                                                    C

interactive(children=(Dropdown(description='Select Cluster:', index=1, options=(2, 3, 1), value=3), Output()),…

# Maddie's Code

In [17]:
links = shc.linkage(normalized_hm[features], method = 'ward')
# Set a threshold to determine the number of clusters 
threshold = 3
# You can adjust this value based on your data and requirements

# Retrieve cluster labels for each data point 
clusters = fcluster(links, t = threshold, criterion='distance')

normalized_hm['Cluster'] = list(clusters)
normalized_hm.head(10)

Unnamed: 0,Downside Reliability,Convexity,Cost,Decay,Total,Cluster
Barclays Weekly Short Var Hourly Index BXIIUWFH Index,0.845629,0.001556,0.952677,1.0,2.799862,6
Barclays US Small Cap Weekly Var Hourly Index BXIIRWVH Index,0.790128,0.001557,0.940718,1.0,2.732403,6
Barclays Eurozone Weekly Short Var Hourly Index BXIIESFH Index,0.708328,0.001306,0.95796,1.0,2.667594,6
Barclays US Long Short Variance Relative Value Strategy BXIIUVR2 Index,0.52779,1.0,0.0,0.10137,1.62916,2
Barclays VIX Options Risk Premium Series II Strategy Excess Return BEFSDHVD Index,0.661711,0.018968,0.885105,0.687671,2.253455,8
Barclays Global Multi-Factor MH EW Index BXIIWFHU Index,0.589812,0.001812,0.95877,0.531507,2.081901,8
Barclays Global Multi-Factor MH EW Index HiC BXIIWHIC Index,0.58752,0.001346,0.958411,0.271233,1.81851,4
Global Multi-Factor MH 2x Leveraged BXIIWFH2 Index,0.590719,0.003914,0.9156,0.410959,1.921193,8
Barclays Global Multi-Factor MH 3x Leveraged BXIIWFH3 Index,0.591646,0.006097,0.872037,0.410959,1.88074,4
Barclays US Multi-Factor MH EW Index BXIIUFHU Index,0.547069,0.004272,0.924251,0.063014,1.538606,1


In [18]:
best_strategies = normalized_hm.groupby('Cluster')['Total'].idxmax()

# Get the rows corresponding to the best strategies
best_performing_strategies = normalized_hm.loc[best_strategies]

print("Best Performing Strategies in Each Cluster:")
print(best_performing_strategies)


Best Performing Strategies in Each Cluster:
                                                    Downside Reliability  \
US Weekly Short Variance Index  BXIIWSVU Index                  0.902237   
Barclays US Long Short Variance Relative Value ...              0.527790   
UBS US Long Short Hourly Variance Balanced  XUB...              0.359822   
Macquarie WTI Intraday Mean Reversion   MQCPR1C...              0.519752   
Citi US Tech Mean Reversion Index  CIEQMUT1 Index               0.544936   
BNP TIER US Index  BNPITIU1 Index                               0.944732   
UBS Epsilon Theory Narrative Alpha Long-Only St...              0.934067   
BNP AIR US 2 TR Index  BNPIAIUS Index                           0.791635   

                                                    Convexity      Cost  \
US Weekly Short Variance Index  BXIIWSVU Index       0.002498  0.934188   
Barclays US Long Short Variance Relative Value ...   1.000000  0.000000   
UBS US Long Short Hourly Variance Balanced  XU

In [19]:

#  function to display the strategies in the selected cluster
def show_strategies_in_cluster(selected_cluster):
    selected_strategies = normalized_hm[normalized_hm['Cluster'] == selected_cluster].index.tolist()
    print(f"\nCluster {selected_cluster} Strategies:")
    print(selected_strategies)

#  a dropdown widget for selecting different cluster numbers
cluster_dropdown = widgets.Dropdown(
    options=normalized_hm['Cluster'].unique(),
    description='Select Cluster:',
     # Default value is the total number of clusters
)


interact(show_strategies_in_cluster, selected_cluster=cluster_dropdown);

interactive(children=(Dropdown(description='Select Cluster:', options=(6, 2, 8, 4, 1, 5, 3, 7), value=6), Outp…

# Create Dendrogram

In [None]:

# Create the dendrogram figure
dendro_fig = ff.create_dendrogram(links, orientation='bottom')

# Add custom hover text to the dendrogram
dendro_fig.update_traces(hovertext=dendro_fig['data'][0]['text'])

# Customize the layout
dendro_fig.update_layout(width=800, height=700, title='Dendrogram')

# Define a callback function for hover events
def hover_callback(trace, points, state):
    ind = points.point_inds[0]
    cluster_num = clusters[ind]
    strategies = normalized_hm[normalized_hm['Cluster'] == cluster_num].index.tolist()
    tooltip = f"Cluster {cluster_num} Strategies: <br>" + "\n".join(strategies)
    trace.set_hovertext(tooltip)
# Add the callback to the dendrogram
dendro_fig.data[0].on_hover(hover_callback)

# Set the x-axis labels to the names of the strategies
layout = dendro_fig.layout
layout.xaxis.ticktext = normalized_hm.index.tolist()


# Show the dendrogram
dendro_fig.show()

## Find what strats are in each cluster
find what strats rank highest in each cluster 
best in each metric
best over all

In [20]:
fig = plt.figure(figsize=(20, 10))
plt.xticks(rotation=45)
dend = shc.dendrogram(links)

Error in callback <function flush_figures at 0x0000027B8E624670> (for post_execute):


KeyboardInterrupt: 

In [21]:
# Add a callback function to the dendrogram
def on_click(event):
    if event.mouseevent.button == 1:
        ind = event.ind[0]
        cluster_num = clusters[ind]
        strategies = normalized_hm[normalized_hm['Cluster'] == cluster_num].index.tolist()
        print(f"Cluster {cluster_num} Strategies: {strategies}")

fig.canvas.mpl_connect('pick_event', on_click)
plt.show()

## Dendrogram 

## Principal Component Analysis

In [None]:
pca = PCA(n_components = 2)
X_principal = pca.fit_transform(normalized_hm)

In [None]:
X_principal = pd.DataFrame(X_principal, index = normalized_hm.index)
X_principal.columns = ['P1', 'P2']

In [None]:
X_principal

## Elbow Method

In [None]:
inertia = []
K = range(1,6)
for k in K:
    kmeanModel = KMeans(n_clusters=k).fit(normalized_hm)
    kmeanModel.fit(normalized_hm)
    inertia.append(kmeanModel.inertia_)
    
    # Plot the elbow
plt.plot(K, inertia, 'bx-')
plt.xlabel('k')
plt.ylabel('Inertia')
plt.show()

## Determine amount of clusters

In [None]:
# input clusters here
ncluster = 3

In [None]:
ac2 = AgglomerativeClustering(n_clusters = ncluster, affinity = 'euclidean', linkage = 'ward')
X_principal['Cluster'] = ac2.fit_predict(X_principal)
X_principal2 = X_principal.copy()
X_principal2['Strat'] = list(X_principal.index)
X_principal2

## Agglomerative Clustering 

In [None]:
ac2 = AgglomerativeClustering(n_clusters = ncluster, affinity = 'euclidean', linkage = 'ward')
fig = px.scatter(X_principal2, x= 'P1',y ='P2', 
                 color = ac2.fit_predict(X_principal), hover_data = ['Strat'])
fig

In [None]:
ac2 = AgglomerativeClustering(n_clusters = ncluster, affinity = 'euclidean', linkage = 'single')
fig = px.scatter( x= X_principal['P1'],y =X_principal['P2'], color = ac2.fit_predict(X_principal))
fig

In [None]:
ac2 = AgglomerativeClustering(n_clusters = ncluster, affinity = 'euclidean', linkage = 'complete')
fig = px.scatter( x= X_principal['P1'],y =X_principal['P2'], color = ac2.fit_predict(X_principal))
fig

In [None]:
ac2 = AgglomerativeClustering(n_clusters = ncluster, affinity = 'euclidean', linkage = 'average')
fig = px.scatter( x= X_principal['P1'],y =X_principal['P2'], color = ac2.fit_predict(X_principal))
fig