In [2]:
import json
import random
import warnings

import fastjsonschema
import hypernetx as hnx
import numpy as np
import pandas as pd

In [3]:
warnings.simplefilter("ignore")

## Create a hypergraph with data

In [4]:
from hypernetx.utils import toys

lesmis = toys.LesMis()
names = lesmis.df_names
scenes = lesmis.df_scenes
scenes["edges"] = [
    ".".join([str(scenes.loc[idx][col]) for col in scenes.columns[:-2]])
    for idx in scenes.index
]
scenes["nodes"] = scenes["Characters"]
scenes

Unnamed: 0,Volume,Book,Chapter,Scene,Step,Characters,edges,nodes
0,1,1,1,0,0,MY,1.1.1.0,MY
1,1,1,1,0,0,NP,1.1.1.0,NP
2,1,1,1,1,1,MY,1.1.1.1,MY
3,1,1,1,1,1,MB,1.1.1.1,MB
4,1,1,2,0,2,MY,1.1.2.0,MY
...,...,...,...,...,...,...,...,...
857,5,9,4,1,400,MA,5.9.4.1,MA
858,5,9,4,1,400,CO,5.9.4.1,CO
859,5,9,5,0,401,JV,5.9.5.0,JV
860,5,9,5,0,401,CO,5.9.5.0,CO


In [5]:
df = scenes[["edges", "nodes"]]
cell_weights = df.groupby(["edges"]).count().to_dict()["nodes"]
df["weights"] = df.edges.map(lambda e: np.round(1 / cell_weights.get(e, 1), 2))
df

Unnamed: 0,edges,nodes,weights
0,1.1.1.0,MY,0.50
1,1.1.1.0,NP,0.50
2,1.1.1.1,MY,0.50
3,1.1.1.1,MB,0.50
4,1.1.2.0,MY,0.50
...,...,...,...
857,5.9.4.1,MA,0.50
858,5.9.4.1,CO,0.50
859,5.9.5.0,JV,0.33
860,5.9.5.0,CO,0.33


In [6]:
nprops = names
nprops["weights"] = np.round(np.random.uniform(0, 1, len(names)), 2)
nprops

Unnamed: 0,Symbol,FullName,Description,weights
0,AZ,Anzelma,daughter of TH and TM,0.67
1,BA,Bahorel,`Friends of the ABC' cutup,0.85
2,BB,Babet,tooth-pulling bandit of Paris,0.99
3,BJ,Brujon,notorious criminal,0.02
4,BL,Blacheville,Parisian student from Montauban,0.69
...,...,...,...,...
75,TS,Toussaint,servant of JV at Rue Plumet,0.29
76,VI,Madame Victurnien,snoop in M-- sur M--,0.55
77,XA,Child 1,son of TH sold to MN,0.32
78,XB,Child 2,son of TH sold to MN,0.80


In [7]:
lm = hnx.Hypergraph(
    df,
    cell_weight_col="weights",
    node_properties=nprops,
    node_weight_prop_col="weights",
)
lm.dataframe

Unnamed: 0,edges,nodes,weight,misc_properties
0,1.1.1.0,MY,0.50,{}
1,1.1.1.0,NP,0.50,{}
2,1.1.1.1,MY,0.50,{}
3,1.1.1.1,MB,0.50,{}
4,1.1.2.0,MY,0.50,{}
...,...,...,...,...
857,5.9.4.1,MA,0.50,{}
858,5.9.4.1,CO,0.50,{}
859,5.9.5.0,JV,0.33,{}
860,5.9.5.0,CO,0.33,{}


In [8]:
lm.nodes.dataframe

Unnamed: 0_level_0,weight,FullName,Description,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MY,0.08,Monsieur Charles Fran\c{c}ois Bienvenu Myriel,Bishop of D--,{}
NP,0.44,Napoleon,Emperor of France,{}
MB,0.01,Mademoiselle Baptistine,sister of MY,{}
ME,0.06,Madame Magloire,housekeeper to MY,{}
CL,0.81,Countess de Lô,distant relative of MY,{}
...,...,...,...,...
XA,0.32,Child 1,son of TH sold to MN,{}
XB,0.80,Child 2,son of TH sold to MN,{}
BJ,0.02,Brujon,notorious criminal,{}
HL,0.40,Madame Hucheloup,keeper of Corinth Inn,{}


## Load schema and create validator

In [9]:
schema = json.load(open("hif_schema_v0.1.0.json","r"))
schema

{'$schema': 'http://json-schema.org/draft-07/schema#',
 '$id': 'https://raw.githubusercontent.com/pszufe/HIF_validators/main/schemas/hif_schema_v0.1.0.json',
 'title': 'Schema for Hypergraph Interchange Format - HIF',
 'type': 'object',
 'properties': {'network-type': {'enum': ['undirected', 'directed', 'asc']},
  'metadata': {'type': 'object'},
  'incidences': {'type': 'array',
   'items': {'type': 'object',
    'properties': {'edge': {'type': ['string', 'integer']},
     'node': {'type': ['string', 'integer']},
     'weight': {'type': 'number'},
     'direction': {'enum': ['head', 'tail']},
     'attr': {'type': 'object'},
     'unevaluatedProperties': False},
    'required': ['edge', 'node']}},
  'nodes': {'type': 'array',
   'items': {'type': 'object',
    'properties': {'node': {'type': ['string', 'integer']},
     'weight': {'type': 'number'},
     'attr': {'type': 'object'},
     'unevaluatedProperties': False},
    'required': ['node']}},
  'edges': {'type': 'array',
   'items'

In [10]:
validator = fastjsonschema.compile(schema);

## HNX translators

In [11]:
def to_hif(hg):
    edgj = hg.edges.to_dataframe
    edid = edgj.index._name or "index"
    nodj = hg.nodes.to_dataframe
    ndid = nodj.index._name or "index"
    edgj = edgj.reset_index().rename(columns={edid: "edge"}).to_dict(orient="records")
    nodj = nodj.reset_index().rename(columns={ndid: "node"}).to_dict(orient="records")
    incj = (
        hg.incidences.to_dataframe.reset_index()
        .rename(columns={"nodes": "node", "edges": "edge"})
        .to_dict(orient="records")
    )
    hif = {"edges": edgj, "nodes": nodj, "incidences": incj}
    return hif


def from_hif(hif):
    edges = pd.DataFrame(hif["edges"])
    nodes = pd.DataFrame(hif["nodes"])
    incidences = pd.DataFrame(hif["incidences"])
    return hnx.Hypergraph(incidences, node_properties=nodes, edge_properties=edges)

In [18]:
# %%timeit #5.55 ms ± 101 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
hif = to_hif(lm)
json.dump(hif,open("../examples/lesmis_hif.json","w"),allow_nan=False)

In [19]:
# %%timeit #742 µs ± 2.52 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
try:
    validator(hif);
except Exception as e:
    print(e)

## Simple Testing

In [20]:

from copy import deepcopy
hiftest = deepcopy(hif)
hiftest['network-type'] = "ordered"
try:
    validator(hiftest);
except Exception as e:
    print(e)
validator(hiftest);

data.network-type must be one of ['undirected', 'directed', 'asc']


JsonSchemaValueException: data.network-type must be one of ['undirected', 'directed', 'asc']

In [21]:
# %%timeit #9.99 ms ± 219 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
h = from_hif(hif)

In [22]:
h.dataframe[:5]

Unnamed: 0,edges,nodes,weight,misc_properties
0,1.1.1.0,MY,0.5,{}
1,1.1.1.0,NP,0.5,{}
2,1.1.1.1,MY,0.5,{}
3,1.1.1.1,MB,0.5,{}
4,1.1.2.0,MY,0.5,{}


## Hypergraph Examples

### contacts-high-school

In [23]:
hs = json.load(open(f"../examples/contacts-high-school.json", "r"))

In [24]:
hs.keys()

dict_keys(['nodes', 'hyperedges'])

In [25]:
hs["hyperedges"][:5]

[{'interaction': [454, 640], 'time': 1385982020},
 {'interaction': [1, 939], 'time': 1385982020},
 {'interaction': [185, 258], 'time': 1385982020},
 {'interaction': [9, 45], 'time': 1385982020},
 {'interaction': [9, 453], 'time': 1385982020}]

In [26]:
hs["nodes"][:2]

[{'class': 'MP',
  'id': 454,
  'has_facebook': True,
  'has_compiled_questionnaire': False,
  'facebook_friends': [34, 151, 156, 159, 866, 640, 1232],
  'sex': 'F'},
 {'class': 'MP',
  'id': 640,
  'has_facebook': True,
  'has_compiled_questionnaire': False,
  'facebook_friends': [151, 159, 454],
  'sex': 'M'}]

In [37]:
df = pd.DataFrame(hs["hyperedges"]).fillna("")
df["edge"] = df.interaction.map(lambda x: x[0])
df["node"] = df.interaction.map(lambda x: x[1])
df = df[["edge", "node", "time"]]

nodedf = pd.DataFrame(hs["nodes"])
nodedf = nodedf.set_index("id").reset_index().fillna("")

hshyp = hnx.Hypergraph(df, node_properties=nodedf)

In [38]:
hshyp.dataframe

Unnamed: 0,edges,nodes,weight,time,misc_properties
0,454,640,1,1385982020,{}
1,1,939,1,1385982020,{}
2,185,258,1,1385982020,{}
3,9,45,1,1385982020,{}
4,9,453,1,1385982020,{}
...,...,...,...,...,...
5584,232,311,1,1386343520,{}
5585,46,306,1,1386343520,{}
5586,306,471,1,1386343540,{}
5587,836,1339,1,1386343740,{}


In [39]:
hshyp.nodes.dataframe

Unnamed: 0_level_0,weight,class,has_facebook,has_compiled_questionnaire,facebook_friends,sex,questionnaire_friends,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
640,1,MP,True,False,"[151, 159, 454]",M,,{}
939,1,2BIO3,True,False,"[1, 55, 101, 106, 117, 119, 122, 132, 156, 170...",M,,{}
258,1,PC*,False,False,,F,,{}
45,1,PC,False,True,,F,"[79, 335, 496, 601, 674, 765]",{}
453,1,PC,False,False,,M,,{}
...,...,...,...,...,...,...,...,...
445,1,MP*1,False,False,,M,,{}
15,1,PC,False,False,,M,,{}
46,1,2BIO2,False,True,,F,"[196, 257, 268]",{}
70,1,2BIO3,True,True,"[119, 202, 545, 649, 425, 653, 122, 275, 132, ...",F,"[132, 240, 425, 447]",{}


In [40]:
hnx.info_dict(hshyp)

{'nrows': 317,
 'ncols': 319,
 'aspect ratio': 0.9937304075235109,
 'ncells': 5589,
 'density': 0.05526932547491668}

In [41]:
# %%timeit #18.1 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
hshif = to_hif(hshyp)

In [47]:
try:
    validator(hshif);
except Exception as e:
    print(e)

In [43]:
# %%timeit #26.2 ms ± 516 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
newhshyp = from_hif(hshif)

In [44]:
hnx.info_dict(newhshyp)

{'nrows': 317,
 'ncols': 319,
 'aspect ratio': 0.9937304075235109,
 'ncells': 5589,
 'density': 0.05526932547491668}

In [45]:
assert hshyp == newhshyp ## this checks that the structure is the same

In [46]:
json.dump(hshif,open("../examples/contacts_high_school_hif.json","w"),allow_nan=False)