In [1]:
import json
import networkx as nx

def summarize_statistic(graph_data):
    G = nx.DiGraph()

    for s, o, r in graph_data["coo"]:
        G.add_edge(s, o, relation=r)
    
    stats = {
        "Avg node degree" : len(graph_data["coo"])/ graph_data["num_entities"],
        "Density" : nx.density(G),
        "Avg degree centrality": sum(nx.degree_centrality(G).values())/graph_data["num_entities"],
        "Avg betweenness centrality": sum(nx.betweenness_centrality(G).values())/graph_data["num_entities"],
        "Avg closeness centrality": sum(nx.closeness_centrality(G).values())/graph_data["num_entities"],
    }

    return stats

In [2]:
with open("./proc/train.json", 'r') as fp:
    train = json.load(fp)

train_stats = summarize_statistic(train)
train_stats

{'Avg node degree': 0.9349852796859667,
 'Density': 0.00025524847132293475,
 'Avg degree centrality': 0.0004839450898879857,
 'Avg betweenness centrality': 5.20560444943932e-07,
 'Avg closeness centrality': 0.0003804872669994633}

In [3]:
with open("./proc/val.json", 'r') as fp:
    dev = json.load(fp)

dev_stats = summarize_statistic(dev)
dev_stats

{'Avg node degree': 0.9262414800389484,
 'Density': 0.00025029111804726606,
 'Avg degree centrality': 0.00047511444462819543,
 'Avg betweenness centrality': 1.2686971651065607e-07,
 'Avg closeness centrality': 0.0002968602953885731}

In [4]:
with open("./proc/test.json", 'r') as fp:
    test = json.load(fp)

test_stats = summarize_statistic(test)
test_stats

{'Avg node degree': 0.0,
 'Density': 0,
 'Avg degree centrality': 0.0,
 'Avg betweenness centrality': 0.0,
 'Avg closeness centrality': 0.0}