In [1]:
import xgi
import json
import numpy as np

In [2]:
datasets = [
    "contact-primary-school",
    "contact-high-school",
    "hospital-lyon",
    "email-enron",
    "email-eu",
    "coauth-mag-geology",
    "coauth-mag-history",
    "diseasome",
    "disgenenet",
    "ndc-substances",
    "congress-bills",
    "tags-ask-ubuntu",
]

# This argument is very important to the computational feasibility of this algorithm
max_size = 11

In [4]:
try:
    with open(f"Data/dataset_characteristics.json", "r") as file:
        data = json.loads(file.read())
except:
    data = dict()

    for d in datasets:
        data[d] = dict()
        H = xgi.load_xgi_data(d, max_order=max_size - 1)
        H.cleanup()

        data[d]["num-nodes"] = H.num_nodes
        data[d]["num-edges"] = H.num_edges
        data[d]["mean-degree"] = H.num_edges / H.num_nodes
        data[d]["mean-edge-size"] = H.edges.size.mean()

        print("Just finished ", d)

    with open(f"Data/dataset_characteristics.json", "w") as file:
        datastring = json.dumps(data, indent=2)
        file.write(datastring)

Just finished  contact-primary-school
Just finished  contact-high-school
Just finished  hospital-lyon
Just finished  email-enron
Just finished  email-eu
Just finished  coauth-mag-geology
Just finished  coauth-mag-history
Just finished  ndc-substances
Just finished  diseasome
Just finished  disgenenet
Just finished  congress-bills
Just finished  tags-ask-ubuntu


In [5]:
for name in datasets:
    d = data[name]
    print(
        f"{name} & {d['num-nodes']} & {d['num-edges']} & {round(d['mean-degree'], 2)} & {round(d['mean-edge-size'], 2)}"
    )

contact-primary-school & 242 & 12704 & 52.5 & 2.42
contact-high-school & 327 & 7818 & 23.91 & 2.33
hospital-lyon & 75 & 1824 & 24.32 & 2.43
email-enron & 143 & 1442 & 10.08 & 2.97
email-eu & 967 & 23729 & 24.54 & 3.12
coauth-mag-geology & 1061562 & 898649 & 0.85 & 3.72
coauth-mag-history & 448593 & 248633 & 0.55 & 2.72
ndc-substances & 2740 & 4754 & 1.74 & 5.16
diseasome & 516 & 314 & 0.61 & 3.0
disgenenet & 1982 & 760 & 0.38 & 5.14
congress-bills & 1715 & 58788 & 34.28 & 4.95
tags-ask-ubuntu & 3021 & 145053 & 48.01 & 3.43
