In [1]:
# load the hypergraph
import yaml
import logging
import itertools
import os
import sys
import random

from scipy import sparse
import numpy as np
import seaborn as sns
import networkx as nx
import community

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv
from torch_geometric.data import Data

from cell.utils import link_prediction_performance
from cell.cell import Cell, EdgeOverlapCriterion, LinkPredictionCriterion
from cell.graph_statistics import compute_graph_statistics

from utils import load_graphs
from cliques import compute_cliques

In [2]:
class GNN(torch.nn.Module):
    def __init__(self, node_features):
        super().__init__()
        # GCN initialization
        self.conv1 = SAGEConv(node_features, 128)
        self.conv2 = SAGEConv(128, 128)
        self.bn = torch.nn.BatchNorm1d(128)
        
        # self.conv2 = GCNConv(128, 128)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.bn(x)
        x = self.conv2(x, edge_index)

        return x


def save_hypergraph(hg, path):
    with open(path, 'w') as f:
        for edge in hg:
            f.write(' '.join(map(str,edge)) + '\n')



In [32]:

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
logger.setLevel(logging.INFO)

config  = yaml.safe_load(open('./config.yml'))
config['dataset'] = 'NDC-classes'
graphs = load_graphs(config, logger)
config['beta'] = len(graphs['simplicies_train']) * 10

# data = np.array([len(s) for s in graphs['simplicies_train']])
# hist, bins = np.histogram(data, bins=np.linspace(1, 8, 8))
# sns.displot(data)

INFO:root:Finish loading graphs.
INFO:root:Nodes train: 1161
INFO:root:Simplicies train: 1088


number of simplices: 49724
number of unique simplices: 1088
Node index should begin with 0, reindexing the hypergraphs ...
number of nodes in construct graph 1161


In [33]:
from torch_geometric.nn import Node2Vec

graph_adjacency_matrix, weighted_graph_adjacency_matrix = nx.to_numpy_array(graphs['G_train'], nodelist=sorted(graphs['G_train'].nodes())), nx.to_numpy_array(graphs['G_weighted'], nodelist=sorted(graphs['G_train'].nodes()))

edge_index = torch.tensor(np.array(graph_adjacency_matrix.nonzero()), dtype=torch.long)
data = Data(edge_index=edge_index)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = Node2Vec(
    data.edge_index,
    embedding_dim=50,
    walks_per_node=10,
    walk_length=20,
    context_size=10,
    p=1.0,
    q=1.0,
    num_negative_samples=1,
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loader = model.loader(batch_size=128, shuffle=True, num_workers=4)

pos_rw, neg_rw = next(iter(loader))

model.train()
for pos_rw, neg_rw in loader:
    optimizer.zero_grad()
    loss = model.loss(pos_rw.to(device), neg_rw.to(device))
    loss.backward()
    optimizer.step()
    # print(loss.item())

embeddings = model()
embeddings.requires_grad = False


In [34]:
graph_adjacency_matrix, weighted_graph_adjacency_matrix = nx.to_numpy_array(graphs['G_train'], nodelist=sorted(graphs['G_train'].nodes())), nx.to_numpy_array(graphs['G_weighted'], nodelist=sorted(graphs['G_train'].nodes()))
edge_index = torch.tensor(np.array(graph_adjacency_matrix.nonzero()), dtype=torch.long)
edge_value = weighted_graph_adjacency_matrix[graph_adjacency_matrix.nonzero()]

# training for CELL
data = Data(x=embeddings, edge_index=edge_index)
model = GNN(50)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    src, dst = edge_index
    score = (out[src] * out[dst]).sum(dim=-1)
    loss = F.mse_loss(score, torch.tensor(edge_value, dtype=torch.float))
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f'epoch: {epoch}, loss: {loss.item()}')
# edge_index = torch.tensor(np.array(graph.nonzero()), dtype=torch.long)

# training for CELL
sparse_matrix = sparse.csr_matrix(graph_adjacency_matrix)
cell_model = Cell(A=sparse_matrix,
             H=10,
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.80)])
cell_model.train(steps=400,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

epoch: 9, loss: 37.56605529785156
epoch: 19, loss: 13.859573364257812
epoch: 29, loss: 6.7057366371154785
epoch: 39, loss: 3.6129190921783447
epoch: 49, loss: 2.2327308654785156
epoch: 59, loss: 1.4853134155273438
epoch: 69, loss: 1.0493932962417603
epoch: 79, loss: 0.7678614854812622
epoch: 89, loss: 0.5700497627258301
epoch: 99, loss: 0.4281879663467407
epoch: 109, loss: 0.32505276799201965
epoch: 119, loss: 0.2507583498954773
epoch: 129, loss: 1.4019083976745605
epoch: 139, loss: 0.2521152198314667
epoch: 149, loss: 0.25305259227752686
epoch: 159, loss: 0.1696450561285019
epoch: 169, loss: 0.1021614596247673
epoch: 179, loss: 0.07292167097330093
epoch: 189, loss: 0.062380388379096985
epoch: 199, loss: 0.05201612412929535
Step:  10/400 Loss: 4.90158 Edge-Overlap: 0.394 Total-Time: 0
Step:  20/400 Loss: 3.92494 Edge-Overlap: 0.533 Total-Time: 0
Step:  30/400 Loss: 3.63647 Edge-Overlap: 0.646 Total-Time: 1
Step:  40/400 Loss: 3.50911 Edge-Overlap: 0.768 Total-Time: 1
Step:  50/400 Loss

In [35]:
import utils
# from utils import lazy_clique_edge_cover
from importlib import reload


# reconstruct the hypergraph by clique cover
# YOU GUY!!!!!!!!!!!!!!!!!!!!!!
# BAD API!!!!!!!!!!!!!!!!!!!!!!
# G = graphs['G_weighted']
# weighted_adjacency_matrix = nx.to_numpy_array(G, nodelist=sorted(G.nodes()))

# # sampling cliques
# os.remove(f'{config['data_dir']}/{config['dataset']}/cliques_train.pkl')
# os.remove(f'{config['data_dir']}/{config['dataset']}/rho.pkl')


learning_weighted_graphs = []
reconstruct_weighted_graphs = []

for i in range(5):
    # generate WLIG
    generated_graph = cell_model.sample_graph()
    graph_prime = generated_graph.A
    edge_index_prime = torch.tensor(graph_prime.nonzero(), dtype=torch.long)
    x = embeddings
    data_prime = Data(x=x, edge_index = edge_index_prime)
    out = model(data_prime)
    src, dst = edge_index_prime
    score = (out[src] * out[dst]).sum(dim=-1)
    weight = score.detach().numpy()
    weight[weight <= 1] = 1
    weight = np.rint(weight).astype(int)
    weighted_graph_prime = np.copy(graph_prime)
    weighted_graph_prime[weighted_graph_prime.nonzero()] = weight


    learning_graph = nx.from_numpy_array(weighted_graph_prime)
    learning_weighted_graphs.append(learning_graph)

    # sample cliques
    cliques = compute_cliques(graphs, config, logger)
    sample_cliques_table = cliques['children_cliques_train']
    # print(sample_cliques_table)
    sample_cliques = []
    for v in sample_cliques_table.values():
        sample_cliques = sample_cliques + v
    sample_cliques = [list(c) for c in sample_cliques]
    set_sample_cliques = list(set([tuple(sorted(e)) for e in sample_cliques]))
    print(f'len of origin: {len(sample_cliques)}, len of deduplicates: {len(set_sample_cliques)}')

    # reconstruct hyperedges
    reconstruct_hyperedges = utils.lazy_clique_edge_cover(np.copy(weighted_graph_prime), set_sample_cliques, len(graphs['simplicies_train']))
    reconstruct_weighted_graphs.append(utils.construct_graph(reconstruct_hyperedges))

    
    # random.shuffle(set_sample_hyperedges)
    # sample_clique_sizes = [len(c) for c in set_sample_cliques]
    # data = np.array(sample_clique_sizes)
    # hist, bins = np.histogram(data, bins=np.linspace(0, 5, 6))
    # sns.displot(data)
    # reconstruct_hyperedges = utils.lazy_clique_edge_cover(weighted_adjacency_matrix, set_sample_cliques, len(graphs['simplicies_train']))
    # reconstruct_hyperedges_sizes = [len(e) for e in reconstruct_hyperedges]
    # data = np.array(reconstruct_hyperedges_sizes)
    # sns.displot(data)
    set_reconstruct_hyperedges = set([tuple(sorted(e)) for e in reconstruct_hyperedges])
    
    
    print(f'len: {len(graphs['simplicies_train'])}, {graphs['simplicies_train']}')
    print(f'len: {len(set_reconstruct_hyperedges)}, {set_reconstruct_hyperedges}')
    # save_hypergraph(set_reconstruct_hyperedges, f'./baseline/HyperPLR/{config['dataset']}/reconstruct_hyperedges_{i}.txt')


# print('original hypergraph', hypergraph_metrics(graphs['simplicies_train']))
# print('reconstructed hypergraph', hypergraph_metrics(set_reconstruct_hyperedges))

INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624
INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:713, recall: 0.6553308823529411, efficiency:0.06553308823529412
INFO:root:Clique analysis done.


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 
len of origin: 10869, len of deduplicates: 9507
number of nodes in construct graph 901
len: 1088, {(444, 445), (40, 766, 767), (377, 378, 379, 380, 381, 382, 383, 384, 385, 386), (898, 899), (103, 104, 105, 932, 933), (8, 9), (110, 111, 137, 138, 139, 561), (151, 784), (292, 293, 308, 309), (125, 126, 377, 378, 379, 380, 381, 608, 609, 610), (251, 252), (58, 86), (477, 478), (871, 872), (70, 71, 72), (341, 342, 37

INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624
INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:716, recall: 0.6580882352941176, efficiency:0.06580882352941177
INFO:root:Clique analysis done.


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 
len of origin: 10869, len of deduplicates: 9507
number of nodes in construct graph 904
len: 1088, {(444, 445), (40, 766, 767), (377, 378, 379, 380, 381, 382, 383, 384, 385, 386), (898, 899), (103, 104, 105, 932, 933), (8, 9), (110, 111, 137, 138, 139, 561), (151, 784), (292, 293, 308, 309), (125, 126, 377, 378, 379, 380, 381, 608, 609, 610), (251, 252), (58, 86), (477, 478), (871, 872), (70, 71, 72), (341, 342, 37

INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624
INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:715, recall: 0.6571691176470589, efficiency:0.06571691176470588
INFO:root:Clique analysis done.


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 
len of origin: 10869, len of deduplicates: 9507
number of nodes in construct graph 900
len: 1088, {(444, 445), (40, 766, 767), (377, 378, 379, 380, 381, 382, 383, 384, 385, 386), (898, 899), (103, 104, 105, 932, 933), (8, 9), (110, 111, 137, 138, 139, 561), (151, 784), (292, 293, 308, 309), (125, 126, 377, 378, 379, 380, 381, 608, 609, 610), (251, 252), (58, 86), (477, 478), (871, 872), (70, 71, 72), (341, 342, 37

INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624
INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:714, recall: 0.65625, efficiency:0.065625
INFO:root:Clique analysis done.


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 
len of origin: 10869, len of deduplicates: 9507
number of nodes in construct graph 914
len: 1088, {(444, 445), (40, 766, 767), (377, 378, 379, 380, 381, 382, 383, 384, 385, 386), (898, 899), (103, 104, 105, 932, 933), (8, 9), (110, 111, 137, 138, 139, 561), (151, 784), (292, 293, 308, 309), (125, 126, 377, 378, 379, 380, 381, 608, 609, 610), (251, 252), (58, 86), (477, 478), (871, 872), (70, 71, 72), (341, 342, 37

INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624
INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:716, recall: 0.6580882352941176, efficiency:0.06580882352941177
INFO:root:Clique analysis done.


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 
len of origin: 10869, len of deduplicates: 9507
number of nodes in construct graph 890
len: 1088, {(444, 445), (40, 766, 767), (377, 378, 379, 380, 381, 382, 383, 384, 385, 386), (898, 899), (103, 104, 105, 932, 933), (8, 9), (110, 111, 137, 138, 139, 561), (151, 784), (292, 293, 308, 309), (125, 126, 377, 378, 379, 380, 381, 608, 609, 610), (251, 252), (58, 86), (477, 478), (871, 872), (70, 71, 72), (341, 342, 37

In [36]:
import statistics

def average_and_std_dicts(dict_list):
    if not dict_list:
        return {}

    # Initialize the result dictionary
    result = {}
    
    # Collect all values for each key
    all_values = {}
    for d in dict_list:
        for key, value in d.items():
            if key not in all_values:
                all_values[key] = []
            all_values[key].append(value)
    
    # Calculate mean and standard deviation for each key
    for key, values in all_values.items():
        mean = statistics.mean(values)
        std = statistics.stdev(values) if len(values) > 1 else 0
        result[key] = {"mean": mean}
    
    return result


def hypergraph_metrics(G):
    
    part_G = community.best_partition(G)
    mod_G = community.modularity(part_G, G)

    return {
        "coefficient": nx.average_clustering(G),
        "G_modularity": mod_G
    }


learning_baselines = [hypergraph_metrics(g) for g in learning_weighted_graphs]
reconstruct_baselines = [hypergraph_metrics(g) for g in reconstruct_weighted_graphs]


print(average_and_std_dicts(learning_baselines))
print(average_and_std_dicts(reconstruct_baselines))





{'coefficient': {'mean': 0.4520350770336879}, 'G_modularity': {'mean': 0.5377301204652842}}
{'coefficient': {'mean': 0.7884456003770601}, 'G_modularity': {'mean': 0.6251362988328488}}


In [9]:
import os
import networkx as nx
import community
import itertools
from collections import defaultdict
import pickle


def hypergraph_metrics(hg):

    # projected graph
    G = nx.Graph()
    # Add all nodes from the hypergraph
    nodes = set(node for edge in hg for node in edge)
    G.add_nodes_from(nodes)
    # For each hyperedge, create a clique
    for edge in hg:
        # Add edges between all pairs of nodes in the hyperedge
        G.add_edges_from(itertools.combinations(edge, 2))
    
    part_G = community.best_partition(G)
    mod_G = community.modularity(part_G, G)


    # bipartite graph
    B = nx.Graph()
    # Add nodes for the original vertices (left set)
    left_nodes = set(node for edge in hg for node in edge)
    B.add_nodes_from(left_nodes, bipartite=0)
    # Add nodes for the hyperedges (right set)
    right_nodes = [f'e{i}' for i in range(len(hg))]
    B.add_nodes_from(right_nodes, bipartite=1)
    # Add edges between vertices and their corresponding hyperedges
    for i, edge in enumerate(hg):
        for node in edge:
            B.add_edge(node, f'e{i}')


    part_B = community.best_partition(B)
    mod_B = community.modularity(part_B, B)

    return {
        "density": density,
        "average_size": avg_size,
        "average_degree": avg_degree,
        "coefficient": nx.average_clustering(G),
        "G_modularity": mod_G,
        "B_modularity": mod_B
    }

def load_hypergraph(path, model):
    with open(path, 'r') as f:
        hg = f.readlines()
    if model == 'HyperDK00' or model == 'HyperDK11' or model == 'HyperPLR':
        hg = [list(map(int, e.split())) for e in hg]
    else:
        hg = [list(map(int, e.split(','))) for e in hg]
    return hg

metric_baseline = defaultdict(list)


def get_metrics_baseline(graph_path):
    models = os.listdir(graph_path)
    for model in models:
        graphs = os.listdir(f'{graph_path}/{model}')
        for graph in graphs:
            hypergraphs = os.listdir(f'{graph_path}/{model}/{graph}')
            for hypergraph in hypergraphs:
                hg = load_hypergraph(f'{graph_path}/{model}/{graph}/{hypergraph}', model)
                metric = hypergraph_metrics(hg)
                print(metric)
                metric_baseline[(graph, model)].append(metric)
    return metric_baseline
        # for hypergraphs in gen_model:
        #     for hg_file in hypergraphs:
        #         hg = load_hypergraph(hg_file)
        #         metric = hypergraph_metrics(hg)
        #         print(metric)

metric_baseline = get_metrics_baseline('./generate_graphs')
metric_baseline


{'density': 25.783385909568874, 'average_size': 3.6207585644371942, 'average_degree': 93.35541535226078, 'coefficient': 0.6259629871207, 'G_modularity': 0.2722784660485447, 'B_modularity': 0.5640446148887537}
{'density': 25.810526315789474, 'average_size': 3.6207585644371942, 'average_degree': 93.45368421052632, 'coefficient': 0.6213564115253399, 'G_modularity': 0.2752167763398538, 'B_modularity': 0.5692146810492797}
{'density': 25.8377239199157, 'average_size': 3.6207585644371942, 'average_degree': 93.55216016859852, 'coefficient': 0.6233081596167646, 'G_modularity': 0.2799695593163135, 'B_modularity': 0.5715013095283078}
{'density': 25.568300312825862, 'average_size': 3.6207585644371942, 'average_degree': 92.57664233576642, 'coefficient': 0.6225840873022264, 'G_modularity': 0.2767877391922733, 'B_modularity': 0.5740449099116591}
{'density': 25.892291446673706, 'average_size': 3.6207585644371942, 'average_degree': 93.74973600844773, 'coefficient': 0.6197850254913693, 'G_modularity': 0

defaultdict(list,
            {('email-Eu',
              'Hyperlap'): [{'density': 25.783385909568874,
               'average_size': 3.6207585644371942,
               'average_degree': 93.35541535226078,
               'coefficient': 0.6259629871207,
               'G_modularity': 0.2722784660485447,
               'B_modularity': 0.5640446148887537}, {'density': 25.810526315789474,
               'average_size': 3.6207585644371942,
               'average_degree': 93.45368421052632,
               'coefficient': 0.6213564115253399,
               'G_modularity': 0.2752167763398538,
               'B_modularity': 0.5692146810492797}, {'density': 25.8377239199157,
               'average_size': 3.6207585644371942,
               'average_degree': 93.55216016859852,
               'coefficient': 0.6233081596167646,
               'G_modularity': 0.2799695593163135,
               'B_modularity': 0.5715013095283078}, {'density': 25.568300312825862,
               'average_size': 3.6207

In [11]:
# pickle.dump(metric_baseline, open('./metric_baseline.pkl', 'wb'))