In [None]:
import numpy as np
from collections import defaultdict
from scipy.stats import entropy
from collections import Counter
dataset = 'roman_empire'

In [None]:
def load_data(file_path):
    data = np.load(file_path, allow_pickle=True)
    edges = data['edges']
    node_labels = data['node_labels']
    data.close()
    return edges, node_labels

file_path = f'./{dataset}.npz'

In [None]:
#Node Homophily
def node_homophily(edges, node_labels):
    neighbors = defaultdict(set)
    homophily_ratios = []

    # Build a dictionary of neighbors for each node
    for edge in edges:
        neighbors[edge[0]].add(edge[1])
        neighbors[edge[1]].add(edge[0])

    # Calculate homophily for each node
    for node, node_neighbors in neighbors.items():
        if node_neighbors:  # Avoid division by zero
            same_label_count = sum(node_labels[neighbor] == node_labels[node] for neighbor in node_neighbors)
            homophily_ratio = same_label_count / len(node_neighbors)
            homophily_ratios.append(homophily_ratio)

    # Return the average homophily
    return np.mean(homophily_ratios)


edges, node_labels = load_data(file_path)
homophily_ratio_node = node_homophily(edges, node_labels)
print(f"Node Homophily Ratio for {dataset} : {homophily_ratio_node}")

Node Homophily Ratio for roman_empire : 0.046047879669139924


In [None]:
#Edge Homophily
def edge_homophily(edges, node_labels):
    same_label_count = 0
    total_edges = edges.shape[0]

    for edge in edges:
        if node_labels[edge[0]] == node_labels[edge[1]]:
            same_label_count += 1

    return same_label_count / total_edges

edges, node_labels = load_data(file_path)
homophily_ratio_edge = edge_homophily(edges, node_labels)
print(f"Edge Homophily Ratio for {dataset} : {homophily_ratio_edge}")


Edge Homophily Ratio for roman_empire : 0.04689160871017706


In [None]:
#Adjusted Homophily
def calculate_degree_sums(node_labels, edges):
    degree_sums = defaultdict(int)
    for edge in edges:
        degree_sums[node_labels[edge[0]]] += 1
        degree_sums[node_labels[edge[1]]] += 1
    return degree_sums

def calculate_adjusted_homophily(node_labels, edges):
    hedge = edge_homophily(edges, node_labels)
    degree_sums = calculate_degree_sums(node_labels, edges)
    total_edges = len(edges)
    sum_dk_squared = sum((dk**2 for dk in degree_sums.values())) / (2 * total_edges)**2
    hadj = (hedge - sum_dk_squared) / (1 - sum_dk_squared)
    return hadj

edges, node_labels = load_data(file_path)
adjusted_homophily = calculate_adjusted_homophily(node_labels, edges)
print(f"Adjusted Homophily for {dataset} : {adjusted_homophily}")

Adjusted Homophily for roman_empire : -0.046757752048503035
