In [11]:
#Notebook to perform correlation between stock prices
import sys
sys.path.append("..")

import numpy as np
import pandas as pd
import vectorbtpro as vbt
from core import strat
import scipy.cluster.hierarchy as sch

In [48]:
def cluster_corr(corr_array, prop_number_of_groups, inplace=False):
    """
    Rearranges the correlation matrix, corr_array, so that groups of highly 
    correlated variables are next to eachother 

    Parameters
    ----------
    corr_array : pandas.DataFrame or numpy.ndarray
        a NxN correlation matrix 
    prop_number_of_groups: a value proportional to the number of group obtained
        
    Returns
    -------
    pandas.DataFrame or numpy.ndarray
        a NxN correlation matrix with the columns and rows rearranged
    """
    pairwise_distances = sch.distance.pdist(corr_array)
    linkage = sch.linkage(pairwise_distances, method='complete')
    cluster_distance_threshold = pairwise_distances.max()/prop_number_of_groups

    idx_to_cluster_array = sch.fcluster(linkage, cluster_distance_threshold, 
                                        criterion='distance')
    
    arr=[[] for ii in range(max(idx_to_cluster_array))]
    for ii, e in enumerate(idx_to_cluster_array):
        arr[e-1].append(corr_array.columns[ii])

    idx = np.argsort(idx_to_cluster_array)
    
    if not inplace:
        corr_array = corr_array.copy()
    
    if isinstance(corr_array, pd.DataFrame):
        return corr_array.iloc[idx, :].T.iloc[idx, :], arr
    return corr_array[idx, :][:, idx], arr

In [13]:
period="2007_2023_08"
symbol_index="NASDAQ"
ust=strat.StratHold(period,symbol_index=symbol_index)

  0%|          | 0/71 [00:00<?, ?it/s]

In [53]:
corr_matrix, arr= cluster_corr(ust.close.corr(),8)

In [44]:
corr_matrix

symbol,AAPL,INTC,INTU,ISRG,KLAC,LRCX,MAR,MCHP,MDLZ,MNST,...,CTAS,CSX,CSCO,CPRT,CTSH,BIDU,BIIB,WBA,GILD,DB
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAPL,1.000000,0.697105,0.969420,0.946378,0.986379,0.973803,0.872718,0.958820,0.914324,0.916874,...,0.977346,0.939430,0.873533,0.975657,0.725229,0.311523,0.447247,0.144669,0.565118,-0.574239
INTC,0.697105,1.000000,0.790861,0.834911,0.666044,0.741927,0.843184,0.806962,0.820162,0.826083,...,0.764231,0.853402,0.874649,0.762302,0.860644,0.588128,0.685063,0.506219,0.652708,-0.697605
INTU,0.969420,0.790861,1.000000,0.979394,0.960692,0.972499,0.901987,0.968384,0.922310,0.921210,...,0.974961,0.967557,0.933794,0.978036,0.776988,0.350815,0.492280,0.218626,0.581990,-0.614321
ISRG,0.946378,0.834911,0.979394,1.000000,0.934408,0.957015,0.914183,0.968900,0.920745,0.930981,...,0.967479,0.975542,0.946575,0.976482,0.799770,0.391690,0.522482,0.233424,0.578725,-0.644280
KLAC,0.986379,0.666044,0.960692,0.934408,1.000000,0.976767,0.872267,0.962045,0.903528,0.904641,...,0.973822,0.926642,0.873044,0.971797,0.691729,0.280497,0.417438,0.132219,0.551033,-0.531813
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BIDU,0.311523,0.588128,0.350815,0.391690,0.280497,0.313425,0.540297,0.445332,0.510458,0.544650,...,0.331199,0.447415,0.382202,0.295918,0.785732,1.000000,0.816468,0.758180,0.718676,-0.656880
BIIB,0.447247,0.685063,0.492280,0.522482,0.417438,0.422597,0.656702,0.554717,0.699802,0.695340,...,0.500919,0.570380,0.552732,0.461708,0.811303,0.816468,1.000000,0.828171,0.929839,-0.700255
WBA,0.144669,0.506219,0.218626,0.233424,0.132219,0.147028,0.458272,0.295920,0.455012,0.447751,...,0.211925,0.309481,0.348323,0.146209,0.690306,0.758180,0.828171,1.000000,0.777442,-0.498987
GILD,0.565118,0.652708,0.581990,0.578725,0.551033,0.522704,0.721945,0.636098,0.787483,0.772961,...,0.609968,0.631506,0.624323,0.558225,0.809147,0.718676,0.929839,0.777442,1.000000,-0.685902


In [55]:
#symbols groups together
arr

[['ATVI',
  'BKNG',
  'CMCSA',
  'CTSH',
  'DLTR',
  'EA',
  'ILMN',
  'INTC',
  'REGN',
  'ROST',
  'SIRI',
  'SWKS'],
 ['AAPL',
  'ADBE',
  'ADI',
  'ADP',
  'ADSK',
  'AEP',
  'ALGN',
  'AMAT',
  'AMD',
  'AMGN',
  'AMZN',
  'ANSS',
  'ASML',
  'CDNS',
  'COST',
  'CPRT',
  'CSCO',
  'CSX',
  'CTAS',
  'DXCM',
  'EBAY',
  'FAST',
  'GOOG',
  'HON',
  'IDXX',
  'INTU',
  'ISRG',
  'KLAC',
  'LRCX',
  'MAR',
  'MCHP',
  'MDLZ',
  'MNST',
  'MRVL',
  'MSFT',
  'MTCH',
  'MU',
  'NFLX',
  'NTES',
  'NVDA',
  'ORLY',
  'PAYX',
  'PCAR',
  'PEP',
  'QCOM',
  'SBUX',
  'SGEN',
  'SNPS',
  'TXN',
  'VRSN',
  'VRTX',
  'XEL'],
 ['EXC'],
 ['BIIB', 'GILD'],
 ['BIDU', 'WBA'],
 ['DB']]