In [8]:
import json
import numpy as np
import pandas as pd
from scipy.stats import spearmanr, pearsonr
import xgi
from sod import *

# Global and local correlation measures

### Global correlation measures

First, we quantify the correlation between different measures of simpliciality on the corpus of higher-order datasets that we consider. 

In [2]:
def load_data(filename="Data/empirical_simpliciality.json"):
    with open(filename, 'r') as f:
        data = json.load(f)
    return data

# Use the function to load the data
data = load_data()



# Convert the nested dictionary to a pandas DataFrame
df = pd.DataFrame.from_dict(data, orient='index')

# Specify the columns to compute correlations for
columns = ["es", "fes", "sf"]

# Compute the correlations and p-values for each pair of columns
results = {}
for i in range(len(columns)):
    for j in range(i+1, len(columns)):
        col1, col2 = columns[i], columns[j]

        # Spearman's correlation
        spearman_corr, spearman_p = spearmanr(df[col1], df[col2])
        results[(col1, col2, "spearman")] = (spearman_corr, spearman_p)

        # Pearson's correlation
        pearson_corr, pearson_p = pearsonr(df[col1], df[col2])
        results[(col1, col2, "pearson")] = (pearson_corr, pearson_p)

# Print results
for key, (corr, p_val) in results.items():
    col1, col2, method = key
    print(f"Correlation ({method}) between {col1} and {col2}: {corr:.4f}, p-value: {p_val:.4f}")



Correlation (spearman) between es and fes: 0.9030, p-value: 0.0003
Correlation (pearson) between es and fes: 0.9027, p-value: 0.0003
Correlation (spearman) between es and sf: 0.8875, p-value: 0.0006
Correlation (pearson) between es and sf: 0.9500, p-value: 0.0000
Correlation (spearman) between fes and sf: 0.9970, p-value: 0.0000
Correlation (pearson) between fes and sf: 0.9718, p-value: 0.0000


### Local correlations

Now examining a single higher-order dataset, we look at the simplicial assortativity and the correlation between measures

In [6]:
dataset = "email-enron"
max_order = 2

H = xgi.load_xgi_data(dataset, max_order=max_order)
H.cleanup()

In [9]:
sf = H.nodes.local_simplicial_fraction.asnumpy()
es = H.nodes.local_edit_simpliciality.asnumpy()
fes = H.nodes.local_face_edit_simpliciality.asnumpy()

In [18]:
ls = {"sf": sf, "es": es, "fes": fes}
results = {}

for i, m1 in enumerate(ls):
    for j, m2 in enumerate(ls):
        if i < j:
            s1 = ls[m1][~np.isnan(ls[m1])]
            s2 = ls[m2][~np.isnan(ls[m2])]

            # Spearman's correlation
            spearman_corr, spearman_p = spearmanr(s1, s2)
            results[(m1, m2, "spearman")] = (spearman_corr, spearman_p)

            # Pearson's correlation
            pearson_corr, pearson_p = pearsonr(s1, s2)
            results[(m1, m2, "pearson")] = (pearson_corr, pearson_p)

# Print results
for key, (corr, p_val) in results.items():
    col1, col2, method = key
    print(f"Correlation ({method}) between {col1} and {col2}: {corr:.4f}, p-value: {p_val:.4f}")



Correlation (spearman) between sf and es: 0.7774, p-value: 0.0000
Correlation (pearson) between sf and es: 0.6888, p-value: 0.0000
Correlation (spearman) between sf and fes: 0.9663, p-value: 0.0000
Correlation (pearson) between sf and fes: 0.8354, p-value: 0.0000
Correlation (spearman) between es and fes: 0.7982, p-value: 0.0000
Correlation (pearson) between es and fes: 0.7537, p-value: 0.0000


In [11]:
def simplicial_assortativity(s, H, weighted=False):
    A = xgi.adjacency_matrix(H, sparse=False, weighted=True)
    n = np.size(A, 0)
    x = []
    y = []
    for i in range(n):
        for j in range(i):
            if A[i, j] and not np.isnan(s[i]) and not np.isnan(s[j]):
                if weighted:
                    x.extend([s[i]] * int(A[i, j]))
                    x.extend([s[j]] * int(A[i, j]))
                    y.extend([s[j]] * int(A[i, j]))
                    y.extend([s[i]] * int(A[i, j]))
                else:
                    x.append(s[i])
                    x.append(s[j])
                    y.append(s[j])
                    y.append(s[i])
    return np.corrcoef(x, y)[0, 1]

In [None]:
print("The simplicial assortavity of the SF measure:")
print(simplicial_assortativity(sf, H, False))

print("\nThe simplicial assortavity of the ES measure:")
print(simplicial_assortativity(es, H, False))

print("\nThe simplicial assortavity of the FES measure:")
print(simplicial_assortativity(fes, H, False))

In [None]:
print("The weighted simplicial assortavity of the SF measure:")
print(simplicial_assortativity(sf, H, True))

print("\nThe weighted simplicial assortavity of the ES measure:")
print(simplicial_assortativity(es, H, True))

print("\nThe weighted simplicial assortavity of the FES measure:")
print(simplicial_assortativity(fes, H, True))