In [1]:
import networkx as nx
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
import NEMtropy as nem
import numpy as np

In [14]:
# Load the datasets from the assignment_1_data folder
path = "NYSE/"
npy_files = [f for f in listdir(path) if isfile(join(path, f)) and f.endswith(".npy")]
numpy_files = [np.load(path + f) for f in npy_files]

correlation_matrices = {k: v for k, v in zip(npy_files, numpy_files)}
stocknames = {i:ticker for i, ticker in enumerate(np.loadtxt(path + "stocknames.txt", dtype=str))}

In [5]:
def calc_first_two_eigvals(correlation_matrix):
    """
    Calculates the first two eigenvalues of a correlation matrix
    :param correlation_matrix: The correlation matrix
    :return: The first two eigenvalues
    """
    eigenvalues, _ = np.linalg.eig(correlation_matrix)
    return eigenvalues[0], eigenvalues[1]

In [6]:
for k, v in correlation_matrices.items():
    first, second = calc_first_two_eigvals(v)
    print("first eigenvalue of " + k + " is " + str(first))
    print("second eigenvalue of " + k + " is " + str(second))


first eigenvalue of cormat_1h.npy is 31.027835738491515
second eigenvalue of cormat_1h.npy is 6.11462817996359
first eigenvalue of cormat_1m.npy is 30.15967998949909
second eigenvalue of cormat_1m.npy is 3.064418558871989
first eigenvalue of cormat_gaussian_1h.npy is 2.2312393897967744
second eigenvalue of cormat_gaussian_1h.npy is 2.13516468614258
first eigenvalue of cormat_gaussian_1m.npy is 1.143542012222033
second eigenvalue of cormat_gaussian_1m.npy is 0.8693971572232587
first eigenvalue of cormat_onefactor_1h.npy is 22.518565425931232
second eigenvalue of cormat_onefactor_1h.npy is 1.7720182491336811
first eigenvalue of cormat_onefactor_1m.npy is 21.318227400071393
second eigenvalue of cormat_onefactor_1m.npy is 0.5196141002460726


In [18]:
def analyze_mst(graph):
    mst = nx.minimum_spanning_tree(graph)

    # Compute degree distribution
    degree_distribution = dict(nx.degree(mst))

    # Identify top 5 stocks with highest degree
    top_5_stocks = sorted(degree_distribution.items(), key=lambda x: x[1], reverse=True)[:5]

    return degree_distribution, top_5_stocks

for k, v in correlation_matrices.items():
    # Create a graph from the correlation matrix
    graph = nx.from_numpy_matrix(v)
    nx.set_node_attributes(graph, values=stocknames, name='ticker')

    # Analyze the MST
    degree_distribution, top_5_stocks = analyze_mst(graph)

    # Print or use the results as needed
    print("Degree distribution for " + k + ":", degree_distribution)
    # print("Top 5 stocks for " + k + ":", top_5_stocks)
    print("5 stocks with highest degree for " + k + ":", [graph.nodes[stock[0]]['ticker'] for stock in top_5_stocks])

Degree distribution for cormat_1h.npy: {0: 3, 1: 1, 2: 1, 3: 2, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 11, 13: 13, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 5, 20: 1, 21: 1, 22: 1, 23: 2, 24: 1, 25: 2, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 11, 32: 3, 33: 1, 34: 15, 35: 4, 36: 1, 37: 1, 38: 1, 39: 1, 40: 1, 41: 1, 42: 1, 43: 2, 44: 1, 45: 1, 46: 1, 47: 1, 48: 1, 49: 1, 50: 1, 51: 1, 52: 1, 53: 1, 54: 1, 55: 1, 56: 1, 57: 1, 58: 1, 59: 3, 60: 1, 61: 1, 62: 1, 63: 1, 64: 3, 65: 3, 66: 1, 67: 2, 68: 1, 69: 1, 70: 1, 71: 1, 72: 3, 73: 1, 74: 1, 75: 1, 76: 1, 77: 6, 78: 1, 79: 1, 80: 1, 81: 1, 82: 17, 83: 5, 84: 1, 85: 1, 86: 1, 87: 1, 88: 1, 89: 1, 90: 1, 91: 1, 92: 1, 93: 1, 94: 1, 95: 1, 96: 1}
5 stocks with highest degree for cormat_1h.npy: ['SO', 'EXC', 'BAX', 'BAC', 'EBAY']
Degree distribution for cormat_1m.npy: {0: 1, 1: 5, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 2, 8: 1, 9: 2, 10: 1, 11: 2, 12: 2, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 