In [22]:
import json
import networkx as nx

def summarize_statistic(graph_data):
    G = nx.DiGraph()

    for s, o, r in graph_data["coo"]:
        G.add_edge(s, o, relation=r)
    
    stats = {
        "Avg node degree" : len(graph_data["coo"])/ graph_data["num_entities"],
        "Density" : nx.density(G),
        "Avg degree centrality": sum(nx.degree_centrality(G).values())/graph_data["num_entities"],
        "Avg betweenness centrality": sum(nx.betweenness_centrality(G).values())/graph_data["num_entities"],
        "Avg closeness centrality": sum(nx.closeness_centrality(G).values())/graph_data["num_entities"],
    }

    return stats

In [26]:
with open("./proc/train.json", 'r') as fp:
    train = json.load(fp)

train_stats = summarize_statistic(train)
train_stats

{'Avg node degree': 1.6116094986807388,
 'Density': 0.0007890385413044767,
 'Avg degree centrality': 0.0015755788075441672,
 'Avg betweenness centrality': 6.496659856764448e-09,
 'Avg closeness centrality': 0.0007923029749861181}

In [27]:
with open("./proc/dev.json", 'r') as fp:
    dev = json.load(fp)

dev_stats = summarize_statistic(dev)
dev_stats

{'Avg node degree': 1.36750651607298,
 'Density': 0.0011114042509317297,
 'Avg degree centrality': 0.002218946106551809,
 'Avg betweenness centrality': 0.0,
 'Avg closeness centrality': 0.0011094730532758986}

In [28]:
with open("./proc/test.json", 'r') as fp:
    test = json.load(fp)

test_stats = summarize_statistic(test)
test_stats

{'Avg node degree': 0.0,
 'Density': 0,
 'Avg degree centrality': 0.0,
 'Avg betweenness centrality': 0.0,
 'Avg closeness centrality': 0.0}