In [1]:
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import seaborn as sns
import datetime

# import custom functions
from corrmat_functions import *

In [2]:
# load network
pmfgs = np.load('data/pmfgs.npy', allow_pickle='TRUE').item()  # PMFGs

In [3]:
#changing pmfgs so it has the same layout as msts and tns

#setting tickers as node attributes for pmfgs
for i, (k,pmfg) in enumerate(sorted(pmfgs.items())):
    tickers = {index: stockname for index, stockname in enumerate(pmfg['ticker'])}
    nx.set_node_attributes(pmfg['network'], tickers , 'ticker')

#changing dictionary structure to { <timestamp> : <pmfg> }
pmfgs = {k:v['network'] for k,v in pmfgs.items()}

In [6]:
def laplace(x, N, d = 2, alpha = 1):
    return (x + alpha) / (N + alpha * d)

def mutual_info(g1, g2):
    # !!! ONLY apply to networks w/ same vertices

    # to adjacency matrices
    adj_1 = nx.adjacency_matrix(g1).toarray()
    adj_2 = nx.adjacency_matrix(g2).toarray()

    # binarilize the adjacency matrices
    adj_1[adj_1 != 0] = 1
    adj_2[adj_2 != 0] = 1

    # independent prob
    max_link = (adj_1.shape[0] * adj_1.shape[1] - min(adj_1.shape[0], adj_1.shape[1]))

    pi_1_1 = laplace((2 * np.sum(adj_1)), max_link)
    pi_1_0 = 1 - pi_1_1
    pi_2_1 = laplace((2 * np.sum(adj_2)), max_link)
    pi_2_0 = 1 - pi_2_1
    prob_ind_1 = np.array([pi_1_1, pi_1_0])
    prob_ind_2 = np.array([pi_2_1, pi_2_0])
    prob_ind_matrix = np.outer(prob_ind_1.reshape(-1, 1),prob_ind_2) # prob_ind_1.T @ prob_ind_2 = 
                                                                # res = [p1(1) * p2(1), p1(1) * p2(0)],
                                                                #       [p1(0) * p2(1), p1(0) * p2(0)]
    prob_ind_flattened = prob_ind_matrix.flatten()

    # joint prob
    pj_1_1 = laplace((2 * np.sum(adj_1[adj_2 == 1])), max_link)
    pj_1_0 = laplace((2 * (np.sum(adj_1) - np.sum(adj_1[adj_2 == 1]))), max_link)
    pj_0_1 = laplace((2 * (np.sum(adj_2) - np.sum(adj_1[adj_2 == 1]))), max_link)
    pj_0_0 = 1 - laplace((2 * (np.sum(adj_1) + np.sum(adj_2) - np.sum(adj_1[adj_2 == 1]))), max_link)
    prob_joint = np.array([pj_1_1, pj_1_0, pj_0_1, pj_0_0])
    

    # Mutual info
    I = prob_joint @ np.log(prob_joint / prob_ind_flattened) # (3) function (3) in the paper
    
    # normalization
    H_x = -pi_1_0 * np.log(pi_1_0) - pi_1_1 * np.log(pi_1_1)
    H_y = -pi_2_0 * np.log(pi_2_0) - pi_2_1 * np.log(pi_2_1)
    i = I / np.sqrt(H_x * H_y) # (4) function (4) in the paper
    return i


In [7]:
date_1 = '2020-02-26 01-00-00'
date_2 = '2020-03-04 01-00-00'
g1 = pmfgs[date_1]
g2 = pmfgs[date_2]

mutual_info(g1, g2)

0.08546366076602643

In [8]:
mutual_info(g1, g1)

0.9742935924280898