In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from SPARQLWrapper import SPARQLWrapper
import warnings

warnings.filterwarnings('ignore')

### Test graph

In [24]:
def build_graph(node: str):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery("""
                    CONSTRUCT {
                    wd:Q171076 ?p ?object.
                    ?subject3 ?p4 ?object.
                    ?object ?p1 ?subject1.
                    ?subject ?p2 wd:Q171076.
                    ?subject ?p5 ?subject4.
                    ?subject2 ?p3 ?subject.
                    wd:Q171076 wdtn:P227 ?val.
                    ?node wdtn:P227 ?val. }
                    WHERE {
                    {
                        wd:Q171076 ?p ?object.
                        ?subject3 ?p4 ?object.
                        ?object ?p1 ?subject1.
                        OPTIONAL {
                        wd:Q171076 wdtn:P227 ?val.
                        ?node wdtn:P227 ?val. } 
                        FILTER(ISURI(?subject3))
                        FILTER(ISURI(?object))
                        FILTER(ISURI(?subject1))
                    }
                    UNION
                    {
                        ?subject ?p2 wd:Q171076;
                        ?p5 ?subject4.
                        ?subject2 ?p3 ?subject.
                        FILTER(ISURI(?subject2))
                        FILTER(ISURI(?subject4))
                    }
                    }
                """)
    query_result = sparql.queryAndConvert()  # Produces an RDFlib Graph object 
    query_df = pd.DataFrame(query_result, columns=["Subject", "Property", "Value"])
    query_df["Subject"] = query_df["Subject"].str.replace("rdflib.term.URIRef", "")
    query_df["Value"] = query_df["Value"].str.replace("rdflib.term.URIRef", "")
    query_df["Property"] = query_df["Property"].str.replace("rdflib.term.URIRef", "")
    discard_properties = ["P921", "P2860"]
    query_df = query_df[~query_df.Property.str.contains('|'.join(discard_properties))]

    G = nx.MultiDiGraph()
    G = nx.from_pandas_edgelist(query_df, "Subject", "Value", edge_attr="Property")
    return G

test2 = build_graph('e')

### Identifier heuristic

In [52]:
def identifier(graph: object, node: str):
    common_id = {}

    for item in graph.neighbors(node):
        property = graph[node][item]["Property"]
        if property == "http://www.wikidata.org/prop/direct-normalized/P227": 
            for item_neighbor in graph.neighbors(item):
                if item_neighbor == node:
                    continue
                else:
                    property2 = graph[item][item_neighbor]["Property"]
                    if property2 == property:
                        common_id[property] = item_neighbor
    return common_id

identifier(test2, "http://www.wikidata.org/entity/Q171076")

{'http://www.wikidata.org/prop/direct-normalized/P227': 'http://www.wikidata.org/entity/Q338712'}