# Initial load of program

### Imports

In [2]:
import numpy as np
import pandas as pd
import regex as re
import networkx as nx
import matplotlib.pyplot as plt
from SPARQLWrapper import SPARQLWrapper
import warnings

warnings.filterwarnings('ignore')

### Build graph around item

In [78]:
def build_graph(node: str):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(f"""
                    CONSTRUCT {{ 
                    {node} ?p ?object.
                    ?subject3 ?p4 ?object.
                    ?object ?p1 ?subject1.
                    ?subject ?p2 {node}.
                    ?subject ?p5 ?subject4.
                    ?subject2 ?p3 ?subject. 
                    {node} wdtn:P227 ?object2.
                    ?subject5 wdtn:P227 ?object2. 
                    }} 
                    WHERE {{
                        {{ 
                        {node} ?p ?object.
                        ?subject3 ?p4 ?object.
                        ?object ?p1 ?subject1.
                        OPTIONAL {{
                            {node} wdtn:P227 ?object2.
                            ?subject5 wdtn:P227 ?object2.}}
                        FILTER(isURI(?subject1)) 
                        }}
                    UNION
                        {{ 
                        ?subject ?p2 {node}.
                        ?subject2 ?p3 ?subject.
                        ?subject ?p5 ?subject4.
                        FILTER(isURI(?subject4))
                        }}
                    }}
                """)
    query_result = sparql.queryAndConvert()  # Produces a RDFlib Graph object 
    query_df = pd.DataFrame(query_result, columns=["Subject", "Property", "Value"])
    query_df["Subject"] = query_df["Subject"].str.replace("rdflib.term.URIRef", "")
    query_df["Value"] = query_df["Value"].str.replace("rdflib.term.URIRef", "")
    query_df["Property"] = query_df["Property"].str.replace("rdflib.term.URIRef", "")
    discard_properties = ["P921", "P2860"]
    discard_subjects = ["/statement/"]
    query_df = query_df[~query_df.Property.str.contains('|'.join(discard_properties))]
    query_df = query_df[~query_df.Subject.str.contains('|'.join(discard_subjects))]
    query_df = query_df[~query_df.Value.str.contains('|'.join(discard_subjects))]

    G = nx.MultiDiGraph()
    G = nx.from_pandas_edgelist(query_df, "Subject", "Value", edge_attr="Property")
    return G
    
test = build_graph("wd:Q171076")

### Neighborhood heuristic

In [79]:
def common_neighbors_2(graph: object, node: str):
    node_dict = {}

    for item in graph.neighbors(node):
        for neighbor in graph.neighbors(item):
            if neighbor == node or neighbor == 'http://wikiba.se/ontology#BestRank' or neighbor == 'http://wikiba.se/ontology#NormalRank':
                continue
            common_neighbors = sum(n in graph.neighbors(neighbor) for n in graph.neighbors(node))
            node_dict[neighbor] = common_neighbors
    return sorted(node_dict.items(), key=lambda item: item[1], reverse=True)


common_neighbors_2(test, "http://www.wikidata.org/entity/Q171076")            

[('http://www.w3.org/ns/lemon/ontolex#LexicalSense', 4),
 ('http://www.wikidata.org/entity/Q900279', 2),
 ('http://www.wikidata.org/entity/Q35', 2),
 ('http://www.wikidata.org/entity/Q65464996', 2),
 ('http://www.wikidata.org/entity/Q183', 2),
 ('https://cs.wikipedia.org/wiki/Kategorie:Bioplyn', 1),
 ('https://commons.wikimedia.org/wiki/Category:Biogas', 1),
 ('http://www.wikidata.org/entity/Q4167836', 1),
 ('https://uk.wikipedia.org/wiki/%D0%9A%D0%B0%D1%82%D0%B5%D0%B3%D0%BE%D1%80%D1%96%D1%8F:%D0%91%D1%96%D0%BE%D0%B3%D0%B0%D0%B7',
  1),
 ('https://fr.wikipedia.org/wiki/Cat%C3%A9gorie:Biogaz', 1),
 ('http://www.wikidata.org/entity/L30165', 1),
 ('http://www.wikidata.org/entity/Q6821562', 1),
 ('https://et.wikipedia.org/wiki/Metaanitank', 1),
 ('http://www.wikidata.org/entity/Q557573', 1),
 ('http://www.wikidata.org/entity/Q7783222', 1),
 ('http://www.wikidata.org/entity/Q987767', 1),
 ('http://www.wikidata.org/entity/Q10858711', 1),
 ('https://commons.wikimedia.org/wiki/Category:Biogas_