In [2]:
import json
import random
import warnings
from copy import deepcopy
import fastjsonschema
import hypernetx as hnx
import numpy as np
import pandas as pd

In [3]:
warnings.simplefilter("ignore")

## Create a hypergraph with data

In [9]:
from hypernetx.utils import toys

lesmis = toys.LesMis()
names = lesmis.df_names
scenes = lesmis.df_scenes
scenes["edge"] = [
    ".".join([str(scenes.loc[idx][col]) for col in scenes.columns[:-2]])
    for idx in scenes.index
]
scenes["node"] = scenes["Characters"]
df = scenes[["edge", "node"]]
cell_weights = df.groupby(["edge"]).count().to_dict()["node"]
df["weight"] = df.edge.map(lambda e: np.round(1 / cell_weights.get(e, 1), 2))
nprops = names
nprops["weight"] = np.round(np.random.uniform(0, 1, len(names)), 2)
lm = hnx.Hypergraph(
    df,
    cell_weight_col="weight",
    node_properties=nprops,
    node_weight_prop_col="weight",
)
lm.nodes['JV'].job = 'mayor'
lm.nodes['MY'].avocation = 'to be kind'
lm.nodes['BS'].vocation = 'explorer'

In [10]:
lm.nodes.dataframe

Unnamed: 0_level_0,weight,FullName,Description,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MY,0.45,Monsieur Charles Fran\c{c}ois Bienvenu Myriel,Bishop of D--,{'avocation': 'to be kind'}
NP,0.95,Napoleon,Emperor of France,{}
MB,0.40,Mademoiselle Baptistine,sister of MY,{}
ME,0.42,Madame Magloire,housekeeper to MY,{}
CL,0.71,Countess de Lô,distant relative of MY,{}
...,...,...,...,...
XA,0.24,Child 1,son of TH sold to MN,{}
XB,0.16,Child 2,son of TH sold to MN,{}
BJ,0.12,Brujon,notorious criminal,{}
HL,0.16,Madame Hucheloup,keeper of Corinth Inn,{}


## Load schema and create validator

In [14]:
schema = json.load(open("../schemas/hif_schema_v0.1.0.json", "r"))
schema

{'$schema': 'http://json-schema.org/draft-07/schema#',
 '$id': 'https://raw.githubusercontent.com/pszufe/HIF_validators/main/schemas/hif_schema_v0.1.0.json',
 'title': 'Hypergraph Interchange Format v0.1.0',
 'type': 'object',
 'properties': {'network-type': {'enum': ['undirected', 'directed', 'asc']},
  'metadata': {'type': 'object'},
  'incidences': {'type': 'array',
   'items': {'type': 'object',
    'properties': {'edge': {'type': ['string', 'integer']},
     'node': {'type': ['string', 'integer']},
     'weight': {'type': 'number'},
     'direction': {'enum': ['head', 'tail']},
     'attrs': {'type': 'object'}},
    'unevaluatedProperties': False,
    'additionalProperties': False,
    'required': ['edge', 'node']}},
  'nodes': {'type': 'array',
   'items': {'type': 'object',
    'properties': {'node': {'type': ['string', 'integer']},
     'weight': {'type': 'number'},
     'attrs': {'type': 'object'}},
    'unevaluatedProperties': False,
    'additionalProperties': False,
    're

In [15]:
validator = fastjsonschema.compile(schema);

## HNX translators

In [19]:
def normalize_dataframe(df):
    default_cols = ['weight'] + list(set(df.columns).intersection(['direction'])) + ['misc_properties']
    cols = list(set(df.columns).difference(default_cols))
    dfdict = df[cols].T.to_dict()
    newdf = df[default_cols]
    for uid in newdf.index:
        newdf.at[uid,"misc_properties"].update(dfdict[uid])
    return newdf
                          
def to_hif(hg):
    hyp_objs = ['nodes','edges','incidences']
    defaults = {part:dict(getattr(hg,part).property_store._defaults) for part in hyp_objs}
    for part in hyp_objs:
        misc_properties = defaults[part].pop('misc_properties',{})
        defaults[part]['attrs'] = dict(misc_properties)
    
    incj = deepcopy(hg.incidences.to_dataframe)
    incj.index.names = ['edge','node']
    incj = normalize_dataframe(incj)
    incj = incj.rename(columns={"misc_properties":"attrs"})
    incj = incj.reset_index().to_dict(orient="records")
    
    edgj = deepcopy(hg.edges.to_dataframe)
    edgj.index.names = ['edge']
    edgj = normalize_dataframe(edgj)
    edgj = edgj.rename(columns={"misc_properties":"attrs"})
    edgj = edgj.reset_index().to_dict(orient="records")
    
    nodj = deepcopy(hg.nodes.to_dataframe)
    nodj.index.names = ['node']
    nodj = normalize_dataframe(nodj)
    nodj = nodj.rename(columns={"misc_properties":"attrs"})
    nodj = nodj.reset_index().to_dict(orient="records")
    
    return {"edges": edgj, "nodes": nodj, "incidences": incj, "metadata":{'default_attrs':defaults}}


def from_hif(hif):
    mkdd = lambda : {'weight':1, 'attrs':{}}
    hifex = deepcopy(hif)
    parts = {part:deepcopy(pd.DataFrame(hifex[part])) for part in ['nodes','edges','incidences']}    
    defaults = hifex.get('metadata',{}).get('default_attrs',{})
    defaults = {part: defaults.get(part,mkdd()) for part in parts}
    cols = dict()
    default_weights = {part:defaults[part].get('weight',1) for part in parts}
    for part in parts:
        thispart = parts[part]
        d = deepcopy(defaults[part])
        dkeys = [k for k in d.keys() if k not in ['weight','attrs']]
        cols[part] = ['weight'] + dkeys + ['attrs']
        for attr in dkeys:
            thispart[attr] = [row.attrs.pop(attr,d[attr]) for row in thispart.itertuples()]
    nodeprops = parts['nodes'][['node'] + cols['nodes']]
    edgeprops = parts['edges'][['edge'] + cols['edges']]
    incidences = parts['incidences'][['edge','node'] + cols['incidences']]
                
    return hnx.Hypergraph(incidences, default_cell_weight=default_weights['incidences'],
                          misc_cell_properties_col='attrs',
                          node_properties=nodeprops, default_edge_weight=default_weights['edges'],
                          edge_properties=edgeprops, default_node_weight=default_weights['nodes'],
                          misc_properties_col='attrs'
                          )

In [17]:
# %%timeit #5.55 ms ± 101 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
hif = to_hif(lm)
try:
    validator(hif)
except Exception as e:
    print(e)

In [20]:
json.dump(hif, open("../examples/lesmis_hif.json", "w"), allow_nan=False)

## Simple Testing

In [21]:
from copy import deepcopy

hiftest = deepcopy(hif)
hiftest["network-type"] = "ordered"
try:
    validator(hiftest)
except Exception as e:
    print(e)
validator(hiftest);

data.network-type must be one of ['undirected', 'directed', 'asc']


JsonSchemaValueException: data.network-type must be one of ['undirected', 'directed', 'asc']

In [22]:
# %%timeit #9.99 ms ± 219 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
h = from_hif(hif)

In [24]:
h.nodes.dataframe[:5]

Unnamed: 0_level_0,weight,FullName,Description,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MY,0.45,Monsieur Charles Fran\c{c}ois Bienvenu Myriel,Bishop of D--,{'avocation': 'to be kind'}
NP,0.95,Napoleon,Emperor of France,{}
MB,0.4,Mademoiselle Baptistine,sister of MY,{}
ME,0.42,Madame Magloire,housekeeper to MY,{}
CL,0.71,Countess de Lô,distant relative of MY,{}


## Hypergraph Examples

### contacts-high-school

In [25]:
hs = json.load(open(f"../examples/contacts-high-school.json", "r"))

In [26]:
hs.keys()

dict_keys(['nodes', 'hyperedges'])

In [27]:
hs["hyperedges"][:5]

[{'interaction': [454, 640], 'time': 1385982020},
 {'interaction': [1, 939], 'time': 1385982020},
 {'interaction': [185, 258], 'time': 1385982020},
 {'interaction': [9, 45], 'time': 1385982020},
 {'interaction': [9, 453], 'time': 1385982020}]

In [28]:
hs["nodes"][:2]

[{'class': 'MP',
  'id': 454,
  'has_facebook': True,
  'has_compiled_questionnaire': False,
  'facebook_friends': [34, 151, 156, 159, 866, 640, 1232],
  'sex': 'F'},
 {'class': 'MP',
  'id': 640,
  'has_facebook': True,
  'has_compiled_questionnaire': False,
  'facebook_friends': [151, 159, 454],
  'sex': 'M'}]

In [29]:
df = pd.DataFrame(hs["hyperedges"]).fillna("")
df["edge"] = df.interaction.map(lambda x: x[0])
df["node"] = df.interaction.map(lambda x: x[1])
df = df[["edge", "node", "time"]]

nodedf = pd.DataFrame(hs["nodes"])
nodedf = nodedf.set_index("id").reset_index().fillna("")

hshyp = hnx.Hypergraph(df, node_properties=nodedf)

In [30]:
hshyp.dataframe

Unnamed: 0,edges,nodes,weight,time,misc_properties
0,454,640,1,1385982020,{}
1,1,939,1,1385982020,{}
2,185,258,1,1385982020,{}
3,9,45,1,1385982020,{}
4,9,453,1,1385982020,{}
...,...,...,...,...,...
5584,232,311,1,1386343520,{}
5585,46,306,1,1386343520,{}
5586,306,471,1,1386343540,{}
5587,836,1339,1,1386343740,{}


In [31]:
hshyp.nodes.dataframe

Unnamed: 0_level_0,weight,class,has_facebook,has_compiled_questionnaire,facebook_friends,sex,questionnaire_friends,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
640,1,MP,True,False,"[151, 159, 454]",M,,{}
939,1,2BIO3,True,False,"[1, 55, 101, 106, 117, 119, 122, 132, 156, 170...",M,,{}
258,1,PC*,False,False,,F,,{}
45,1,PC,False,True,,F,"[79, 335, 496, 601, 674, 765]",{}
453,1,PC,False,False,,M,,{}
...,...,...,...,...,...,...,...,...
445,1,MP*1,False,False,,M,,{}
15,1,PC,False,False,,M,,{}
46,1,2BIO2,False,True,,F,"[196, 257, 268]",{}
70,1,2BIO3,True,True,"[119, 202, 545, 649, 425, 653, 122, 275, 132, ...",F,"[132, 240, 425, 447]",{}


In [32]:
hnx.info_dict(hshyp)

{'nrows': 317,
 'ncols': 319,
 'aspect ratio': 0.9937304075235109,
 'ncells': 5589,
 'density': 0.05526932547491668}

In [33]:
# %%timeit #18.1 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
hshif = to_hif(hshyp)

In [34]:
try:
    validator(hshif)
except Exception as e:
    print(e)

In [35]:
# %%timeit #26.2 ms ± 516 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
newhshyp = from_hif(hshif)

In [36]:
hnx.info_dict(newhshyp)

{'nrows': 317,
 'ncols': 319,
 'aspect ratio': 0.9937304075235109,
 'ncells': 5589,
 'density': 0.05526932547491668}

In [37]:
assert hshyp == newhshyp  ## this checks that the structure is the same

In [38]:
json.dump(
    hshif, open("../examples/contacts_high_school_hif.json", "w"), allow_nan=False
)

In [40]:
newhshyp.nodes.dataframe

Unnamed: 0_level_0,weight,class,has_facebook,has_compiled_questionnaire,facebook_friends,sex,questionnaire_friends,misc_properties
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
640,1,MP,True,False,"[151, 159, 454]",M,,{}
939,1,2BIO3,True,False,"[1, 55, 101, 106, 117, 119, 122, 132, 156, 170...",M,,{}
258,1,PC*,False,False,,F,,{}
45,1,PC,False,True,,F,"[79, 335, 496, 601, 674, 765]",{}
453,1,PC,False,False,,M,,{}
...,...,...,...,...,...,...,...,...
445,1,MP*1,False,False,,M,,{}
15,1,PC,False,False,,M,,{}
46,1,2BIO2,False,True,,F,"[196, 257, 268]",{}
70,1,2BIO3,True,True,"[119, 202, 545, 649, 425, 653, 122, 275, 132, ...",F,"[132, 240, 425, 447]",{}
