In [7]:
from pathlib import Path
import yaml


config  = yaml.safe_load(open('./config.yml'))
nverts_path = f'{config['dataset_dir']}/{config['dataset']}/{config['dataset']}-nverts.txt'
simplices_path = f'{config['dataset_dir']}/{config['dataset']}/{config['dataset']}-simplices.txt'

with open(nverts_path, 'r') as file:
    nvertx = file.read().split('\n')
    nvertx.pop() # remove the blank line
    nvertx = [int(x) for x in nvertx]

with open(simplices_path, 'r') as file:
    simplices = file.read().split('\n')
    simplices.pop() # remove the blank line

hyperedges = []
hyperedge_idx = 0
for edge_len in nvertx:
    hyper_edge = simplices[hyperedge_idx:hyperedge_idx + edge_len]
    hyperedges.append(hyper_edge)
    hyperedge_idx += edge_len

# reindex
all_nodes = sorted(set(int(n) for s in hyperedges for n in s))
node2i = {node: i for i, node in enumerate(all_nodes)}
reindex_hyperedges = [sorted([str(node2i[int(n)]) for n in s]) for s in hyperedges]


print(f'Number of hyperedges:{len(hyperedges)}')
Path(f'./data/{config['dataset']}').mkdir(parents=True, exist_ok=True)
with open(f'./data/{config['dataset']}/train.txt', 'w') as f:
    for hyperedge in reindex_hyperedges:
        f.write(f"{' '.join(hyperedge)}\n")

unique_hyperedges = list(dict.fromkeys([tuple(sorted(set(s))) for s in reindex_hyperedges]).keys())
# print(unique_hyperedges)
with open(f'./data/{config['dataset']}/unique.txt', 'w') as f:
    for hyperedge in unique_hyperedges:
        f.write(f"{' '.join(hyperedge)}\n")



Number of hyperedges:10883


In [10]:
times_path = f'{config['dataset_dir']}/{config['dataset']}/{config['dataset']}-times.txt'
with open(times_path, 'r') as file:
    times = file.read().split('\n')
    times.pop() # remove the blank line
    times = [int(x) for x in times]


hyperedges_with_idx = list(enumerate(unique_hyperedges))
hyperedges_with_time = [(times[idx], hyperedge) for idx, hyperedge in hyperedges_with_idx]
sorted_hyperedges = sorted(hyperedges_with_time, key=lambda x: x[0])
print(sorted_hyperedges[:10])
# print(hyperedges_with_time[:30])

[(8017, ['152']), (8017, ['1255']), (8017, ['1215']), (8017, ['1254']), (8017, ['1254', '865']), (8017, ['1254', '865']), (8017, ['567', '1016', '152']), (8017, ['1650', '1255']), (8017, ['1027', '14', '1016', '860', '18']), (8017, ['1255', '1018', '865'])]


In [2]:
import yaml
from importlib import reload
import networkx as nx
import utils
reload(utils)

config  = yaml.safe_load(open('./config.yaml'))
dataset_dir = f'{config['data_dir']}/{config['dataset']}/'
simplices = utils.read_simplicies(dataset_dir, mode='train')


decomposed_graph = utils.construct_decomposed_graph(simplices, level=2)
print(decomposed_graph)
# print(decomposed_graph.edges())




number of simplices: 2272433
number of unique simplices: 141087
Node index should begin with 0, reindexing the hypergraphs ...
Graph with 81567 nodes and 1682274 edges


In [14]:
mapping = {}

for idx, node in enumerate(decomposed_graph.nodes()):
    mapping[node] = idx

G = nx.relabel_nodes(decomposed_graph, mapping)
print(G)
print(G.edges())

[(0, 560), (0, 1952), (2, 398), (2, 601), (3, 4), (3, 5), (3, 116), (3, 186), (4, 5), (4, 3967), (4, 1005), (4, 1130), (4, 2671), (4, 1244), (4, 2279), (5, 1004), (5, 1005), (5, 1285), (5, 1571), (5, 2237), (6, 1734), (6, 4469), (7, 605), (7, 142), (8, 9), (8, 10), (9, 10), (9, 1755), (9, 1072), (9, 634), (9, 1073), (9, 4510), (9, 2694), (10, 2242), (10, 657), (11, 2527), (11, 3957), (11, 267), (11, 975), (11, 1904), (11, 3956), (13, 1839), (13, 329), (14, 175), (14, 1903), (16, 401), (16, 1109), (16, 3969), (16, 2208), (18, 19), (18, 20), (18, 410), (18, 421), (18, 427), (18, 881), (18, 115), (18, 428), (18, 772), (18, 140), (18, 1307), (18, 617), (18, 1823), (18, 1998), (18, 1087), (18, 2896), (18, 1116), (18, 1880), (18, 286), (18, 1879), (18, 284), (19, 20), (19, 410), (19, 411), (19, 1823), (19, 1087), (19, 140), (19, 1307), (19, 617), (19, 1998), (19, 428), (19, 3863), (19, 1880), (19, 3085), (20, 1307), (20, 617), (20, 1619), (20, 615), (20, 3310), (20, 1847), (20, 1791), (20, 3

In [None]:
# hyper_edges = set()
# hyperedge_idx = 0
# for edge_len in nvertx:
#     hyper_edge = frozenset(simplices[hyperedge_idx:hyperedge_idx + edge_len])
#     hyper_edges.add(hyper_edge)
#     hyperedge_idx += edge_len
# print(f'Number of hyperedges:{len(hyper_edges)}')

# with open('./data/high-school/check.txt', 'w') as f:
#     for hyperedge in hyper_edges:
#         f.write(f"{' '.join(hyperedge)}\n")


# def reduce_edges(hyper_edges):
#     reduce_edges = deepcopy(hyper_edges)
#     for hyperedge_i in hyper_edges:
#         for hyperedge_j in hyper_edges:
#              if hyperedge_i.issubset(hyperedge_j) and hyperedge_i != hyperedge_j:
#                 reduce_edges.remove(hyperedge_i)
#                 break
#     return reduce_edges


# reduce_hypedges = reduce_edges(hyper_edges)
# print(f'Number of nonrepeat hyperedges: {len(reduce_hypedges)}')

# # write the hyperedge to file
# with open('./data/high-school/train.txt', 'w') as f:
#     for hyperedge in reduce_hypedges:
#         f.write(f"{' '.join(hyperedge)}\n")


In [2]:
# import yaml
# import logging

# from utils import load_graphs

# logging.basicConfig(level=logging.INFO)

# logger = logging.getLogger()
# logger.setLevel(logging.INFO)

# config  = yaml.safe_load(open('./config.yaml'))
# graph = load_graphs(config, logger)

INFO:root:Finish loading graphs.
INFO:root:Nodes train: 327


INFO:root:Simplicies train: 4862


4862
Sanity check failed, reindexing the hypergraphs ...
number of nodes in construct graph 327
