In [8]:
import statistics
from math import sqrt
import pickle

import os
import networkx as nx
import community
import itertools
from collections import defaultdict


def hypergraph_metrics(hg):
    # original hypergraph
    num_edges = len(hg)
    nodes = set()
    node_degrees = {}
    for edge in hg:
        for node in edge:
            nodes.add(node)
            node_degrees[node] = node_degrees.get(node, 0) + 1
    num_nodes = len(nodes)
    
    # density
    density = num_edges / num_nodes

    # Average size
    avg_size = sum(len(edge) for edge in hg) / num_edges

    # Average degree
    avg_degree = sum(node_degrees.values()) / num_nodes


    # projected graph
    G = nx.Graph()
    # Add all nodes from the hypergraph
    nodes = set(node for edge in hg for node in edge)
    G.add_nodes_from(nodes)
    # For each hyperedge, create a clique
    for edge in hg:
        # Add edges between all pairs of nodes in the hyperedge
        G.add_edges_from(itertools.combinations(edge, 2))
    
    part_G = community.best_partition(G)
    mod_G = community.modularity(part_G, G)


    # bipartite graph
    B = nx.Graph()
    # Add nodes for the original vertices (left set)
    left_nodes = set(node for edge in hg for node in edge)
    B.add_nodes_from(left_nodes, bipartite=0)
    # Add nodes for the hyperedges (right set)
    right_nodes = [f'e{i}' for i in range(len(hg))]
    B.add_nodes_from(right_nodes, bipartite=1)
    # Add edges between vertices and their corresponding hyperedges
    for i, edge in enumerate(hg):
        for node in edge:
            B.add_edge(node, f'e{i}')


    part_B = community.best_partition(B)
    mod_B = community.modularity(part_B, B)

    return {
        "density": density,
        "average_size": avg_size,
        "average_degree": avg_degree,
        "coefficient": nx.average_clustering(G),
        "G_modularity": mod_G,
        "B_modularity": mod_B
    }

def load_hypergraph(path):
    with open(path, 'r') as f:
        hg = f.readlines()
    hg = [list(map(int, e.split())) for e in hg]
    return hg

def average_and_std_dicts(dict_list):
    if not dict_list:
        return {}

    # Initialize the result dictionary
    result = {}
    
    # Collect all values for each key
    all_values = {}
    for d in dict_list:
        for key, value in d.items():
            if key not in all_values:
                all_values[key] = []
            all_values[key].append(value)
    
    # Calculate mean and standard deviation for each key
    for key, values in all_values.items():
        mean = statistics.mean(values)
        std = statistics.stdev(values) if len(values) > 1 else 0
        result[key] = {"mean": mean, "std": std}
    
    return result


metric_baseline = pickle.load(open('./analysis/metric_baseline.pkl', 'rb'))
# Example usage
# dict_list = [{'a': 1, 'b': 2, 'c': 3}, {'a': 3, 'b': 5, 'c': 2}, {'a': 2, 'b': 3, 'c': 1}]

graphs = ['contact-high-school', 'contact-primary-school', 'email-Enron', 'email-Eu', 'NDC-classes']
models = ['HyperDK00','HyperDK11', 'Hyperlap', 'Hyperlap+', 'TheRA', 'HyperPLR']

for graph in graphs:
    metric_data = hypergraph_metrics(load_hypergraph(f'./data/{graph}/unique.txt'))
    print(f'\\bf {graph} \t&{metric_data["density"]:.3f} \t&{metric_data["average_size"]:.3f} \t&{metric_data["average_degree"]:.3f} \t&{metric_data["coefficient"]:.3f} \t&{metric_data["G_modularity"]:.3f} \t&{metric_data["B_modularity"]:.3f}\\\\')
    for model in models:
        result = average_and_std_dicts(metric_baseline[(graph, model)])
        # print(f"{model} \t&{result['density']['mean']:.3f}\t&{result['average_size']['mean']:.3f}\t&{result['average_degree']['mean']:.3f}\t&{result['coefficient']['mean']:.3f}\t&{result['G_modularity']['mean']:.3f}\t&{result['B_modularity']['mean']:.3f}\\\\")
        print(f"{model} \t&{result['density']['mean']:.3f} + {result['density']['std']:.3f} \t&{result['average_size']['mean']:.3f} + {result['average_size']['std']:.3f} \t&{result['average_degree']['mean']:.3f} + {result['average_degree']['std']:.3f} \t&{result['coefficient']['mean']:.3f} + {result['coefficient']['std']:.3f} \t&{result['G_modularity']['mean']:.3f} + {result['G_modularity']['std']:.3f} \t&{result['B_modularity']['mean']:.3f} + {result['B_modularity']['std']:.3f}\\\\")

\bf contact-high-school 	&23.908 	&2.327 	&55.633 	&0.504 	&0.581 	&0.749\\
HyperDK00 	&272.161 + 0.260 	&3.089 + 0.003 	&840.796 + 0.714 	&0.999 + 0.000 	&0.000 + 0.000 	&0.324 + 0.000\\
HyperDK11 	&142.573 + 0.142 	&2.178 + 0.000 	&310.567 + 0.270 	&0.861 + 0.001 	&0.016 + 0.001 	&0.459 + 0.000\\
Hyperlap 	&23.938 + 0.040 	&2.327 + 0.000 	&55.701 + 0.093 	&0.341 + 0.004 	&0.394 + 0.006 	&0.622 + 0.004\\
Hyperlap+ 	&23.908 + 0.000 	&2.327 + 0.000 	&55.633 + 0.000 	&0.632 + 0.005 	&0.742 + 0.001 	&0.747 + 0.001\\
TheRA 	&23.908 + 0.000 	&2.609 + 0.000 	&62.382 + 0.000 	&0.354 + 0.000 	&0.405 + 0.000 	&0.831 + 0.000\\
HyperPLR 	&24.055 + 0.000 	&2.703 + 0.003 	&65.011 + 0.073 	&0.471 + 0.000 	&0.556 + 0.000 	&0.649 + 0.003\\
\bf contact-primary-school 	&52.496 	&2.419 	&126.979 	&0.526 	&0.284 	&0.634\\
HyperDK00 	&229.896 + 0.490 	&3.129 + 0.003 	&719.429 + 0.927 	&1.000 + 0.000 	&0.000 + 0.000 	&0.319 + 0.000\\
HyperDK11 	&143.343 + 0.110 	&2.285 + 0.000 	&327.516 + 0.218 	&0.924 + 0.

In [15]:
metric_baseline = pickle.load(open('./analysis/metric_baseline.pkl', 'rb'))

graph = 'contact-high-school'
model = 'Hyperlap'

def average_and_std_dicts(dict_list):
    if not dict_list:
        return {}

    # Initialize the result dictionary
    result = {}
    
    # Collect all values for each key
    all_values = {}
    for d in dict_list:
        for key, value in d.items():
            if key not in all_values:
                all_values[key] = []
            all_values[key].append(value)

    return all_values

def sum_absolute_differences(lst):
    # Initialize a variable to store the sum
    total_difference = 0

    # Iterate through all pairs in the list
    for i in range(len(lst)):
        for j in range(i + 1, len(lst)):  # Avoid duplicate pairs and self-pairing
            total_difference += abs(lst[i] - lst[j])
    
    return total_difference

graphs = ['contact-high-school', 'contact-primary-school', 'email-Enron', 'email-Eu', 'NDC-classes']
models = ['Hyperlap', 'Hyperlap+', 'HyperPLR']

for graph in graphs:
    for model in models:
        print(graph, model)
        all_values = average_and_std_dicts(metric_baseline[(graph, model)])
        print(sum_absolute_differences(all_values['density']))
        print(sum_absolute_differences(all_values['average_size']))
        print(sum_absolute_differences(all_values['average_degree']))
        print(sum_absolute_differences(all_values['coefficient']))
        print(sum_absolute_differences(all_values['G_modularity']))
        print(sum_absolute_differences(all_values['B_modularity']))


contact-high-school Hyperlap
0.44002926774358286
0.0
1.0239207519558988
0.04908607978933521
0.07687339360878154
0.04756645425439521
contact-high-school Hyperlap+
0.0
0.0
0.0
0.06676842434277352
0.016903083113154027
0.00872601272926632
contact-high-school HyperPLR
0.0
0.03786134561268728
0.9107692307692048
0.005392473577392032
0.0038117098959737916
0.036831156043632074
contact-primary-school Hyperlap
0.0
0.0
0.0
0.011092899123632538
0.036073993497294454
0.017587634528491547
contact-primary-school Hyperlap+
0.0
0.0
0.0
0.28378062363031153
0.006736080078793449
0.3294744408138881
contact-primary-school HyperPLR
0.0
0.04896095717884208
2.580912863070523
0.0
0.01735171568637872
0.028350454722275487
email-Enron Hyperlap
0.43110410716044356
0.0
1.3497488427065818
0.10792483428412614
0.561102237748315
0.13422156837636967
email-Enron Hyperlap+
0.0
0.0
0.0
0.16611182787973977
0.104836323901637
0.01890789808643656
email-Enron HyperPLR
0.0
0.4193121693121693
4.496453900709199
0.016979461275574037
0