In [1]:
import json
import random
import warnings

import fastjsonschema
import hypernetx as hnx
import numpy as np
import pandas as pd

In [2]:
import hypergraphx as hgx
import xgi

In [3]:
warnings.simplefilter("ignore")

## Create a hypergraph with data

In [4]:
from hypernetx.utils import toys

lesmis = toys.LesMis()
names = lesmis.df_names
scenes = lesmis.df_scenes
scenes["edges"] = [
    ".".join([str(scenes.loc[idx][col]) for col in scenes.columns[:-2]])
    for idx in scenes.index
]
scenes["nodes"] = scenes["Characters"]
scenes

Unnamed: 0,Volume,Book,Chapter,Scene,Step,Characters,edges,nodes
0,1,1,1,0,0,MY,1.1.1.0,MY
1,1,1,1,0,0,NP,1.1.1.0,NP
2,1,1,1,1,1,MY,1.1.1.1,MY
3,1,1,1,1,1,MB,1.1.1.1,MB
4,1,1,2,0,2,MY,1.1.2.0,MY
...,...,...,...,...,...,...,...,...
857,5,9,4,1,400,MA,5.9.4.1,MA
858,5,9,4,1,400,CO,5.9.4.1,CO
859,5,9,5,0,401,JV,5.9.5.0,JV
860,5,9,5,0,401,CO,5.9.5.0,CO


In [5]:
df = scenes[["edges", "nodes"]]
cell_weights = df.groupby(["edges"]).count().to_dict()["nodes"]
df["weights"] = df.edges.map(lambda e: np.round(1 / cell_weights.get(e, 1), 2))
df

Unnamed: 0,edges,nodes,weights
0,1.1.1.0,MY,0.50
1,1.1.1.0,NP,0.50
2,1.1.1.1,MY,0.50
3,1.1.1.1,MB,0.50
4,1.1.2.0,MY,0.50
...,...,...,...
857,5.9.4.1,MA,0.50
858,5.9.4.1,CO,0.50
859,5.9.5.0,JV,0.33
860,5.9.5.0,CO,0.33


In [6]:
nprops = names
nprops["weights"] = np.round(np.random.uniform(0, 1, len(names)), 2)
nprops

Unnamed: 0,Symbol,FullName,Description,weights
0,AZ,Anzelma,daughter of TH and TM,0.71
1,BA,Bahorel,`Friends of the ABC' cutup,0.32
2,BB,Babet,tooth-pulling bandit of Paris,0.32
3,BJ,Brujon,notorious criminal,0.60
4,BL,Blacheville,Parisian student from Montauban,0.22
...,...,...,...,...
75,TS,Toussaint,servant of JV at Rue Plumet,0.44
76,VI,Madame Victurnien,snoop in M-- sur M--,0.23
77,XA,Child 1,son of TH sold to MN,0.62
78,XB,Child 2,son of TH sold to MN,0.05


In [7]:
lm = hnx.Hypergraph(
    df,
    cell_weight_col="weights",
    node_properties=nprops,
    node_weight_prop_col="weights",
)
lm.dataframe

Unnamed: 0,edges,nodes,weight,misc_properties
0,1.1.1.0,MY,0.50,{}
1,1.1.1.0,NP,0.50,{}
2,1.1.1.1,MY,0.50,{}
3,1.1.1.1,MB,0.50,{}
4,1.1.2.0,MY,0.50,{}
...,...,...,...,...
857,5.9.4.1,MA,0.50,{}
858,5.9.4.1,CO,0.50,{}
859,5.9.5.0,JV,0.33,{}
860,5.9.5.0,CO,0.33,{}


In [8]:
lm.nodes.dataframe

Unnamed: 0_level_0,weight,FullName,Description,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MY,0.14,Monsieur Charles Fran\c{c}ois Bienvenu Myriel,Bishop of D--,{}
NP,0.68,Napoleon,Emperor of France,{}
MB,0.30,Mademoiselle Baptistine,sister of MY,{}
ME,0.29,Madame Magloire,housekeeper to MY,{}
CL,0.41,Countess de Lô,distant relative of MY,{}
...,...,...,...,...
XA,0.62,Child 1,son of TH sold to MN,{}
XB,0.05,Child 2,son of TH sold to MN,{}
BJ,0.60,Brujon,notorious criminal,{}
HL,0.19,Madame Hucheloup,keeper of Corinth Inn,{}


## Schema 1

#### Comments:
The problem I see with this approach is in passing metadata. Conventions may and will vary between libraries. Passing 'weight' or other
keyword values might become confusing with the list method. 

Also, Pythonic practice suggests:
"Explicit is better than implicit."


In [9]:
## See The Zen of Python by Tim Peters:
# import this

In [10]:
schema1 = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$id": "https://raw.githubusercontent.com/pszufe/HIF_validators/main/hif_schema.json",
    "title": "Schema for Hypergraph Interchange Format - HIF",
    "type": "object",
    "properties": {
        "network-type": {"type": "string"},
        "metadata": {"type": "object"},
        "incidences": {
            "type": "array",
            "items": {
                "type": "array",
                "allOf": [
                    {
                        "prefixItems": [
                            {"type": ["string", "integer"]},
                            {"type": ["string", "integer"]},
                        ],
                    },
                    {
                        "prefixItems": [
                            {"type": ["string", "integer"]},
                            {"type": ["string", "integer"]},
                            {"type": "object"},
                        ]
                    },
                ],
            },
        },
        "nodes": {
            "type": "array",
            "prefixItems": [{"type": ["string", "integer"]}, {"type": "object"}],
        },
        "edges": {
            "type": "array",
            "prefixItems": [{"type": ["string", "integer"]}, {"type": "object"}],
        },
    },
    "required": ["incidences"],
}

In [11]:
validate1 = fastjsonschema.compile(schema1)

In [12]:
## HNX hif functions


def flatten(my_dict):
    """
    Recursive method to flatten dictionary for returning properties as
    a dictionary instead of a Series, from https://stackoverflow.com/a/71952620
    """
    result = {}
    for key, value in my_dict.items():
        if value != {} and isinstance(value, dict):
            temp = flatten(value)
            temp.update(result)
            result = temp
        elif value == {}:
            continue
        else:
            result[key] = value
    return result


def to_hif1(h):

    hif = {
        "incidences": [
            [idx[0], idx[1], flatten(h.incidences.dataframe.loc[idx].to_dict())]
            for idx in h.incidences.dataframe.index
        ],
        "edges": [
            [idx, flatten(h.edges.dataframe.loc[idx].to_dict())] for idx in h.edges
        ],
        "nodes": [
            [idx, flatten(h.nodes.dataframe.loc[idx].to_dict())] for idx in h.nodes
        ],
        "metadata": {
            "hnx": {
                "cell_weight_col": "weight",
                "edge_weight_prop_col": "weight",
                "node_weight_prop_col": "weight",
                "misc_cell_properties_col": "properties",
                "misc_node_properties_col": "properties",
                "misc_edge_properties_col": "properties",
            }
        },
    }
    return hif


def from_hif1(hif):
    df1 = pd.DataFrame(hif["incidences"], columns=["edges", "nodes", "properties"])
    edf1 = pd.DataFrame(hif["edges"], columns=["edges", "properties"])
    ndf1 = pd.DataFrame(hif["nodes"], columns=["nodes", "properties"])
    if "metadata" in hif:
        kwargs = hif["metadata"].get("hnx", {})
    return hnx.Hypergraph(df1, edge_properties=edf1, node_properties=ndf1, **kwargs)

In [13]:
# %%timeit         #2.05 s ± 44.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
hif1 = to_hif1(lm)

In [14]:
%%timeit        #33.1 µs ± 160 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
validate1(hif1)

44.9 μs ± 370 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [15]:
# %%timeit        #9.64 ms ± 65.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
h1 = from_hif1(hif1)

## Schema 2

In [21]:
schema2 = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$id": "https://raw.githubusercontent.com/pszufe/HIF_validators/main/hif_schema.json",
    "title": "Schema for Hypergraph Interchange Format - HIF",
    "type": "object",
    "properties": {
        "network-type": {
            "enum": [
                "undirected",
                "directed",
                "asc"
            ]
        },
        "metadata": {
            "type": "object"
        },
        "incidences": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "edge": {
                        "type": [
                            "string",
                            "integer"
                        ]
                    },
                    "node": {
                        "type": [
                            "string",
                            "integer"
                        ]
                    },
                    "weight": {
                        "type": "number"
                    },
                    "unevaluatedProperties": {
                        "type": "object"
                    }
                },
                "required": [
                    "edge",
                    "node"
                ]
            }
        },
        "nodes": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "node": {
                        "type": [
                            "string",
                            "integer"
                        ]
                    },
                    "weight": {
                        "type": "number"
                    },
                    "unevaluatedProperties": {
                        "type": "object"
                    }
                },
                "required": [
                    "node"
                ]
            }
        },
        "edges": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "edge": {
                        "type": [
                            "string",
                            "integer"
                        ]
                    },
                    "weight": {
                        "type": "number"
                    },
                    "unevaluatedProperties": {
                        "type": "object"
                    }
                },
                "required": [
                    "edge"
                ]
            }
        },
        "unevaluatedProperties": False
    },
    "required": [
        "incidences"
    ]
}

In [22]:
validate2 = fastjsonschema.compile(schema2)

In [23]:
def to_hif2(hg):
    edgj = hg.edges.to_dataframe
    edid = edgj.index._name or "index"
    nodj = hg.nodes.to_dataframe
    ndid = nodj.index._name or "index"
    edgj = edgj.reset_index().rename(columns={edid: "edge"}).to_dict(orient="records")
    nodj = nodj.reset_index().rename(columns={ndid: "node"}).to_dict(orient="records")
    incj = (
        hg.incidences.to_dataframe.reset_index()
        .rename(columns={"nodes": "node", "edges": "edge"})
        .to_dict(orient="records")
    )
    hif = {"edges": edgj, "nodes": nodj, "incidences": incj}
    return hif


def from_hif2(hif):
    edges = pd.DataFrame(hif["edges"])
    nodes = pd.DataFrame(hif["nodes"])
    incidences = pd.DataFrame(hif["incidences"])
    return hnx.Hypergraph(incidences, node_properties=nodes, edge_properties=edges)

In [25]:
# %%timeit #5.55 ms ± 101 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
hif2 = to_hif2(lm)

In [26]:
%%timeit #742 µs ± 2.52 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
validate2(hif2)

938 μs ± 4 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [28]:
# %%timeit #9.99 ms ± 219 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
h2 = from_hif2(hif2)

In [29]:
h2.dataframe

Unnamed: 0,edges,nodes,weight,misc_properties
0,1.1.1.0,MY,0.50,{}
1,1.1.1.0,NP,0.50,{}
2,1.1.1.1,MY,0.50,{}
3,1.1.1.1,MB,0.50,{}
4,1.1.2.0,MY,0.50,{}
...,...,...,...,...
857,5.9.4.1,MA,0.50,{}
858,5.9.4.1,CO,0.50,{}
859,5.9.5.0,JV,0.33,{}
860,5.9.5.0,CO,0.33,{}


## Hypergraph Examples

### contacts-high-school

In [30]:
DIR = "HIF examples"
hs = json.load(open(f"../{DIR}/contacts-high-school.json", "r"))

In [31]:
hs.keys()

dict_keys(['nodes', 'hyperedges'])

In [32]:
hs["hyperedges"]

[{'interaction': [454, 640], 'time': 1385982020},
 {'interaction': [1, 939], 'time': 1385982020},
 {'interaction': [185, 258], 'time': 1385982020},
 {'interaction': [9, 45], 'time': 1385982020},
 {'interaction': [9, 453], 'time': 1385982020},
 {'interaction': [265, 494], 'time': 1385982020},
 {'interaction': [376, 638], 'time': 1385982020},
 {'interaction': [14, 190], 'time': 1385982020},
 {'interaction': [400, 637], 'time': 1385982020},
 {'interaction': [255, 275], 'time': 1385982020},
 {'interaction': [116, 176, 533], 'time': 1385982020},
 {'interaction': [151, 866], 'time': 1385982020},
 {'interaction': [280, 484], 'time': 1385982020},
 {'interaction': [295, 441], 'time': 1385982020},
 {'interaction': [101, 425], 'time': 1385982020},
 {'interaction': [55, 170], 'time': 1385982020},
 {'interaction': [243, 687], 'time': 1385982020},
 {'interaction': [47, 241], 'time': 1385982020},
 {'interaction': [179, 202], 'time': 1385982020},
 {'interaction': [63, 179], 'time': 1385982020},
 {'int

In [33]:
hs["nodes"]

[{'class': 'MP',
  'id': 454,
  'has_facebook': True,
  'has_compiled_questionnaire': False,
  'facebook_friends': [34, 151, 156, 159, 866, 640, 1232],
  'sex': 'F'},
 {'class': 'MP',
  'id': 640,
  'has_facebook': True,
  'has_compiled_questionnaire': False,
  'facebook_friends': [151, 159, 454],
  'sex': 'M'},
 {'class': '2BIO3',
  'id': 1,
  'has_facebook': True,
  'has_compiled_questionnaire': True,
  'facebook_friends': [883,
   132,
   339,
   653,
   545,
   171,
   117,
   196,
   587,
   372,
   147,
   55,
   859,
   106,
   504,
   471,
   425,
   170,
   939,
   272,
   3,
   119,
   494,
   205,
   265,
   779,
   364,
   240,
   477,
   101,
   884],
  'questionnaire_friends': [205, 272, 494, 779, 894],
  'sex': 'M'},
 {'class': '2BIO3',
  'id': 939,
  'has_facebook': True,
  'has_compiled_questionnaire': False,
  'facebook_friends': [1,
   55,
   101,
   106,
   117,
   119,
   122,
   132,
   156,
   170,
   184,
   196,
   205,
   240,
   265,
   272,
   346,
   372,
 

In [35]:
df = pd.DataFrame(hs["hyperedges"])
df["edge"] = df.interaction.map(lambda x: x[0])
df["node"] = df.interaction.map(lambda x: x[1])
df = df[["edge", "node", "time"]]

nodedf = pd.DataFrame(hs["nodes"])
nodedf = nodedf.set_index("id").reset_index()

hshyp = hnx.Hypergraph(df, node_properties=nodedf)

In [36]:
hshyp.dataframe

Unnamed: 0,edges,nodes,weight,time,misc_properties
0,454,640,1,1385982020,{}
1,1,939,1,1385982020,{}
2,185,258,1,1385982020,{}
3,9,45,1,1385982020,{}
4,9,453,1,1385982020,{}
...,...,...,...,...,...
5584,232,311,1,1386343520,{}
5585,46,306,1,1386343520,{}
5586,306,471,1,1386343540,{}
5587,836,1339,1,1386343740,{}


In [37]:
hshyp.nodes.dataframe

Unnamed: 0_level_0,weight,class,has_facebook,has_compiled_questionnaire,facebook_friends,sex,questionnaire_friends,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
640,1,MP,True,False,"[151, 159, 454]",M,,{}
939,1,2BIO3,True,False,"[1, 55, 101, 106, 117, 119, 122, 132, 156, 170...",M,,{}
258,1,PC*,False,False,,F,,{}
45,1,PC,False,True,,F,"[79, 335, 496, 601, 674, 765]",{}
453,1,PC,False,False,,M,,{}
...,...,...,...,...,...,...,...,...
445,1,MP*1,False,False,,M,,{}
15,1,PC,False,False,,M,,{}
46,1,2BIO2,False,True,,F,"[196, 257, 268]",{}
70,1,2BIO3,True,True,"[119, 202, 545, 649, 425, 653, 122, 275, 132, ...",F,"[132, 240, 425, 447]",{}


In [38]:
hnx.info_dict(hshyp)

{'nrows': 317,
 'ncols': 319,
 'aspect ratio': 0.9937304075235109,
 'ncells': 5589,
 'density': 0.05526932547491668}

In [42]:
%%timeit  # 50.4 s ± 1.23 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
hshif = to_hif1(hshyp)

54.4 s ± 1.07 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [41]:
validate1(hshif);

In [75]:
%%timeit #26.2 ms ± 516 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
newhshyp = from_hif1(hshif)

26.2 ms ± 516 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [81]:
# %%timeit  ##18.4 ms ± 349 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
hshif2 = to_hif2(hshyp)

In [84]:
# %%timeit  ##22.9 ms ± 554 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
from_hif2(hshif2)

None hypernetx.classes.hypergraph.Hypergraph

## XGI

In [20]:
H_enron = xgi.load_xgi_data("email-enron",nodetype=int,edgetype=int)

In [21]:
H_enron.num_nodes,H_enron.num_edges

(148, 10885)

In [22]:
list(H_enron.edges)[:10]
        

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [25]:
vars(H_enron).keys()

dict_keys(['_edge_uid', '_net_attr', '_node', '_node_attr', '_edge', '_edge_attr', '_nodeview', '_edgeview'])

In [40]:
edge_properties = (pd.DataFrame(vars(H_enron)['_edge_attr']).T).reset_index()
node_properties = (pd.DataFrame(vars(H_enron)['_node_attr']).T).reset_index()
H_enron.edges[0]

AttributeError: 'IDDict' object has no attribute 'elements'