In [1]:
from rdflib import Graph, URIRef, Literal
from rdflib.namespace import RDFS
from typing import List, Dict, Set, Tuple
import networkx as nx
from collections import defaultdict
from utils import *
import random
from ipysigma import Sigma

In [2]:
def my_rdflib_to_networkx(kg: Graph, 
                      custom_node_labels: Dict[URIRef, str] = dict(),
                      custom_edge_labels: Dict[Tuple[URIRef, URIRef], str] = dict(),
                      custom_node_sizes: Dict[URIRef, float] = dict()) -> nx.DiGraph:
    node_labels = {}
    for s,o in kg.subject_objects(predicate=RDFS.label):
        node_labels[s] = o.value
    # any label defined in custom_node_labels will have priority
    # node_labels |= custom_node_labels # this op requires higher python version (3.10+?) for dicts
    
    g = nx.MultiDiGraph()

    edges = set()
    edge_labels = defaultdict(lambda: set())
    connecting_edges = []
    path_edges = []
    for (s, p, o) in kg:
        #if p != RDFS.label:
        # including literals in the graph seems to cause issues
        if not isinstance(o, Literal) and not isinstance(s, Literal):
            edge_lab = node_labels.get(p, "")
            if (s, o) in custom_edge_labels:
                edge_lab = custom_edge_labels[(s, o)] + edge_lab
            edges.add((s, o))
            edge_labels[(s, o)].add(edge_lab)
    # workaround to handle cases where multiple edges exist
    # multidigraph causes problems with ipysigma
    edge_labels = dict(edge_labels)
    edges = [(e[0], e[1], {"label": ', '.join(list(edge_labels.get(e, '')))}) for e in edges]

    g.add_edges_from(edges)
    for n in g.nodes:
        if n not in node_labels:
            node_labels[n] = ""

    # make nodes with more edges larger
    node_sizes = dict(g.degree)
    for k, v in custom_node_sizes.items():
        node_sizes[k] += v

    edge_colors = {k: "property" for k in g.edges}
    node_colors = {n: "entity" for n in g.nodes}
    edge_sizes = {k: 1 for k in g.edges}

    for e in connecting_edges:
        edge_colors[e] = "connection"
        edge_sizes[e] = 2
        node_colors[e[0]] = "cause-effect"
        node_colors[e[1]] = "cause-effect"
    return g, node_sizes, edge_sizes, node_labels, node_colors, edge_colors

def setup_sigma_graph(kg: Graph, 
                      custom_node_labels: Dict[URIRef, str] = dict(),
                      custom_edge_labels: Dict[Tuple[URIRef, URIRef], str] = dict(),
                      custom_node_sizes: Dict[URIRef, float] = dict()) -> Sigma:
    g, node_sizes, edge_sizes, node_labels, node_colors, edge_colors = my_rdflib_to_networkx(kg, custom_node_labels, custom_edge_labels, custom_node_sizes)

    return Sigma(g, node_size=node_sizes, edge_size=edge_sizes,
                 label_density=2, # show_all_labels=True,
                 default_edge_type="arrow", node_border_color_from="node",
                 node_label=node_labels,
                 node_color=node_colors, edge_color=edge_colors, )


In [5]:
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
# example of using the basic rdflib_to_networkx_multidigraph from rdflib
g = Graph()
g.parse((DATA_DIR / "pred_path.ttl").resolve())
# keeping Literal nodes causes issues for sigma
removeset = set()
for n in g.all_nodes():
    if isinstance(n, Literal):
        removeset.add(n)
for n in removeset:
    g.remove((None, None, n))
    g.remove((n, None, None))
base_nxg = rdflib_to_networkx_multidigraph(g)
sig = Sigma(base_nxg, label_density=2, default_edge_type="arrow")
sig

Sigma(nx.MultiDiGraph with 9 nodes and 13 edges)

In [24]:
g = Graph()
g.parse((DATA_DIR / "pred_path.ttl").resolve())
sig = setup_sigma_graph(g)
sig

Sigma(nx.MultiDiGraph with 9 nodes and 13 edges)

In [25]:
g = Graph()
g.parse((DATA_DIR / "fc_mini.ttl").resolve())
sig = setup_sigma_graph(g)
sig

Sigma(nx.MultiDiGraph with 234 nodes and 248 edges)

In [26]:
g = Graph()
g.parse((DATA_DIR / "fc_three.ttl").resolve())
sig = setup_sigma_graph(g)
sig

Sigma(nx.MultiDiGraph with 139,131 nodes and 255,473 edges)