In [1]:
# pip install SPARQLWrapper networkx pyvis
import os
import re
import pickle
from collections import deque

from SPARQLWrapper import SPARQLWrapper, JSON
import networkx as nx
from pyvis.network import Network

SPARQL_ENDPOINT = "https://sparql.dblp.org/sparql"

In [2]:
root_person_uri = "https://dblp.org/pid/82/7468"
max_depth = 2

In [3]:
def sanitize_uri(uri: str) -> str:
    safe_id = re.sub(r"[^a-zA-Z0-9]+", "_", uri)
    safe_id = f"n_{safe_id}"
    return safe_id


def sanitize_nodes_and_edges(nodes, edges):
    safe_nodes = {}
    safe_edges = {}
    uri_mapping = {}

    for old_uri, info in nodes.items():
        new_id = sanitize_uri(old_uri)
        uri_mapping[old_uri] = new_id

        safe_nodes[new_id] = {"name": info["name"], "original_uri": old_uri}

    for (uri1, uri2), einfo in edges.items():
        weight = einfo.get("weight", 1)

        safe_u1 = uri_mapping[uri1]
        safe_u2 = uri_mapping[uri2]

        if safe_u1 < safe_u2:
            edge_key = (safe_u1, safe_u2)
        else:
            edge_key = (safe_u2, safe_u1)

        if edge_key not in safe_edges:
            safe_edges[edge_key] = {"weight": weight}
        else:

            safe_edges[edge_key]["weight"] += weight

    return safe_nodes, safe_edges, uri_mapping

In [4]:
def get_person_name(person_uri: str) -> str:
    sparql = SPARQLWrapper(SPARQL_ENDPOINT)
    sparql.setReturnFormat(JSON)

    query = f"""
    PREFIX dblp: <https://dblp.org/rdf/schema#>
    SELECT ?name
    WHERE {{
        OPTIONAL {{ <{person_uri}> dblp:creatorName ?name . }}
    }}
    LIMIT 1
    """
    sparql.setQuery(query)
    try:
        results = sparql.query().convert()
        bindings = results["results"]["bindings"]
        if bindings:
            name = bindings[0].get("name", {}).get("value")
            if name:
                return name
        return "Unknown"
    except Exception as e:
        print(f"[ERROR] SPARQL query failed when fetching name for {person_uri}: {e}")
        return "Unknown"

In [5]:
# [Debug]
get_person_name("https://dblp.org/pid/82/7468")

'Zied Bouyahia'

In [6]:
def get_coauthor_info(person_uri):
    sparql = SPARQLWrapper(SPARQL_ENDPOINT)
    sparql.setReturnFormat(JSON)

    query = f"""
    PREFIX dblp: <https://dblp.org/rdf/schema#>

    SELECT ?coauthor (SAMPLE(?coauthorName) AS ?name) (COUNT(DISTINCT ?pub) AS ?pubCount)
    WHERE {{
        ?pub dblp:authoredBy <{person_uri}> ;
             dblp:authoredBy ?coauthor .
        FILTER (?coauthor != <{person_uri}>)

        OPTIONAL {{ ?coauthor dblp:creatorName ?coauthorName . }}
    }}
    GROUP BY ?coauthor
    """

    sparql.setQuery(query)

    try:
        results = sparql.query().convert()
    except Exception as e:
        print(f"[ERROR] SPARQL query failed for {person_uri}: {e}")
        return []

    coauthors = []
    for row in results["results"]["bindings"]:
        co_uri = row["coauthor"]["value"]
        co_name = row["name"]["value"] if "name" in row else ""
        pub_count = int(row["pubCount"]["value"]) if "pubCount" in row else 0
        coauthors.append((co_uri, co_name, pub_count))

    return coauthors

In [7]:
# [Debug]
get_coauthor_info("https://dblp.org/pid/82/7468")

[('https://dblp.org/pid/03/6572', 'Hedi Haddad', 12),
 ('https://dblp.org/pid/05/5769', 'Ahmed Nait-Sidi-Moh', 1),
 ('https://dblp.org/pid/08/565', 'Khaled Ghédira', 4),
 ('https://dblp.org/pid/17/848', 'Nafaâ Jabeur', 9),
 ('https://dblp.org/pid/184/9350', 'Leila Horchani', 2),
 ('https://dblp.org/pid/206/1503', 'Hana Gharrad', 1),
 ('https://dblp.org/pid/246/1303', 'Shafique A. Chaudhry', 1),
 ('https://dblp.org/pid/349/4118', 'Mahmoud Mastouri', 1),
 ('https://dblp.org/pid/70/7470', 'Monia Bellalouna', 4),
 ('https://dblp.org/pid/76/6334', 'Stéphane Derrode', 5),
 ('https://dblp.org/pid/81/3284', 'Wojciech Pieczynski', 4),
 ('https://dblp.org/pid/82/2493', 'Ansar Yasar', 2),
 ('https://dblp.org/pid/88/7260', 'Patrick Jaillet', 2),
 ('https://dblp.org/pid/88/7651', 'Fatma Outay', 1)]

In [8]:
def build_coauthor_network(root_person_uri, max_depth=1):
    nodes = {}
    edges = {}

    queue = deque()
    visited = set()

    queue.append((root_person_uri, 0))
    visited.add(root_person_uri)

    root_name = get_person_name(root_person_uri)
    nodes[root_person_uri] = {"name": root_name}

    while queue:
        current_person, depth = queue.popleft()

        if depth >= max_depth:
            continue

        coauthors = get_coauthor_info(current_person)

        for co_uri, co_name, pub_count in coauthors:
            if co_uri not in nodes:
                nodes[co_uri] = {"name": co_name if co_name else "Unknown"}

            edge_key = tuple(sorted([current_person, co_uri]))

            if edge_key not in edges:
                edges[edge_key] = {"weight": pub_count}

            if co_uri not in visited:
                visited.add(co_uri)
                queue.append((co_uri, depth + 1))

    nodes, edges, _ = sanitize_nodes_and_edges(nodes, edges)

    return nodes, edges

In [9]:
def build_adjacency_list(nodes, edges):
    adj_list = {}

    for node_uri in nodes:
        adj_list[node_uri] = []

    for (uri1, uri2), edge_info in edges.items():
        weight = edge_info.get("weight", 1)

        adj_list[uri1].append((uri2, weight))
        adj_list[uri2].append((uri1, weight))

    return adj_list

In [10]:
# [Debug]
nodes, edges = build_coauthor_network("https://dblp.org/pid/82/7468", max_depth=1)

build_adjacency_list(nodes, edges)

{'n_https_dblp_org_pid_82_7468': [('n_https_dblp_org_pid_03_6572', 12),
  ('n_https_dblp_org_pid_05_5769', 1),
  ('n_https_dblp_org_pid_08_565', 4),
  ('n_https_dblp_org_pid_17_848', 9),
  ('n_https_dblp_org_pid_184_9350', 2),
  ('n_https_dblp_org_pid_206_1503', 1),
  ('n_https_dblp_org_pid_246_1303', 1),
  ('n_https_dblp_org_pid_349_4118', 1),
  ('n_https_dblp_org_pid_70_7470', 4),
  ('n_https_dblp_org_pid_76_6334', 5),
  ('n_https_dblp_org_pid_81_3284', 4),
  ('n_https_dblp_org_pid_82_2493', 2),
  ('n_https_dblp_org_pid_88_7260', 2),
  ('n_https_dblp_org_pid_88_7651', 1)],
 'n_https_dblp_org_pid_03_6572': [('n_https_dblp_org_pid_82_7468', 12)],
 'n_https_dblp_org_pid_05_5769': [('n_https_dblp_org_pid_82_7468', 1)],
 'n_https_dblp_org_pid_08_565': [('n_https_dblp_org_pid_82_7468', 4)],
 'n_https_dblp_org_pid_17_848': [('n_https_dblp_org_pid_82_7468', 9)],
 'n_https_dblp_org_pid_184_9350': [('n_https_dblp_org_pid_82_7468', 2)],
 'n_https_dblp_org_pid_206_1503': [('n_https_dblp_org_pid_

In [11]:
def visualize_interactive_pyvis(
    nodes, edges, root_id, output_filename="coauthor_network_interactive"
):
    G = nx.Graph()
    for node_id, info in nodes.items():
        if node_id == root_id:
            G.add_node(node_id, label=info["name"], color="#ED3B3E", font={"size": 24})
        else:
            G.add_node(node_id, label=info["name"], font={"size": 18})
    for (u, v), edge_info in edges.items():
        G.add_edge(
            u,
            v,
            weight=edge_info["weight"],
            label=f"{edge_info['weight']}",
            font={"size": 12},
        )

    os.makedirs(f"output/{output_filename}", exist_ok=True)

    net = Network(height="100vh", width="100vw")
    net.from_nx(G)
    net.save_graph(f"output/{output_filename}/graph.html")

    with open(f"output/{output_filename}/graph.pkl", "wb") as f:
        pickle.dump(G, f)

In [12]:
# [Debug]
visualize_interactive_pyvis(
    nodes,
    edges,
    root_id=sanitize_uri("https://dblp.org/pid/82/7468"),
    output_filename="coauthor_network_interactive_test",
)

In [13]:
if __name__ == "__main__":
    print(
        f"=== Building co-author network for {root_person_uri}, up to depth={max_depth} ==="
    )
    nodes, edges = build_coauthor_network(root_person_uri, max_depth=max_depth)

    print(f"Total authors found: {len(nodes)}")
    print(f"Total edges found: {len(edges)}")

    visualize_interactive_pyvis(
        nodes,
        edges,
        root_id=sanitize_uri(root_person_uri),
        output_filename=f"Coauthor Network of {get_person_name(root_person_uri)} with Depth {max_depth}",
    )

    print("[INFO] Done.")

=== Building co-author network for https://dblp.org/pid/82/7468, up to depth=2 ===
Total authors found: 759
Total edges found: 878
[INFO] Done.
