In [8]:
from SPARQLWrapper import SPARQLWrapper2
from collections import namedtuple
from typing import List, Tuple
import time

In [9]:
Predicate = namedtuple("Predicate", ["uri", "kb_name", "relation"])
pred = Predicate("http://schema.org/author", None, None)
sparql = SPARQLWrapper2("https://yago-knowledge.org/sparql/query")

#######################################################################
# 2. Extract all subject-object pairs for each pred in relevant preds #
#######################################################################
def get_so_pairs_for_pred_chained(pred: Predicate) -> List[Tuple[str, str]]:
    start_time = time.time()
    print(f"Querying {pred.uri}.")
    so_pairs = []

    query = """
SELECT DISTINCT ?s ?o WHERE {{
    ?s <{}> ?o
    BIND(MD5(CONCAT(STR(?s), STR(?o))) AS ?sortkey) .
}}
ORDER BY ?sortkey
LIMIT 1000
""".format(
        pred.uri
    )
    
    query_get_name_of_obj = """
SELECT DISTINCT ?output_obj WHERE {{
    OPTIONAL {{
      <{0}> rdfs:label ?obj_label . 
      FILTER (LANG(?obj_label) = 'en')
    }} # Get the label (name) for the object if it exists
    OPTIONAL {{
      <{0}> rdf:type ?obj_type . 
      ?obj_type rdfs:label ?obj_type_name .
      FILTER (LANG(?obj_type_name) = 'en')
    }} # get the name of the type for the object if the type exists
    BIND(COALESCE(IF(STR(?obj_label) != "Generic instance", ?obj_label, ?obj_type_name), <{0}>) AS ?output_obj) 
}}  
"""

    try:
        sparql.setQuery(query)
        for result in sparql.query().bindings:
            # print(result)
            s_uri = result["s"].value
            o_uri = result["o"].value
            # so_pairs.append((result["s"].value, result["o"].value))
            query_s = query_get_name_of_obj.format(s_uri)            
            sparql.setQuery(query_s)
            res_s = sparql.query().bindings
            if len(res_s) != 1:
                raise ValueError(f">1 label returned for subject {s_uri}: {res_s}")
            s_name = res_s[0]["output_obj"].value

            query_obj = query_get_name_of_obj.format(o_uri)
            sparql.setQuery(query_obj)
            res_obj = sparql.query().bindings
            if len(res_obj) != 1:
                raise ValueError(f">1 label returned for subject {o_uri}: {res_obj}")
            obj_name = res_obj[0]["output_obj"].value

            so_pairs.append((s_name, obj_name))
        
    # sleep(1)
    except (HTTPError, EndPointInternalError) as e:
        print(f"HTTPerror for uri {pred.uri}. Skipping.")
        so_pairs = None
    except Error as e:
        print(e)
    finally:
        time_elapsed = time.time() - start_time
        print(f"Time elapsed: {time_elapsed}")
        return so_pairs

so_pairs = get_so_pairs_for_pred_chained(pred)

Querying http://schema.org/author.
Time elapsed: 235.34771943092346


In [12]:
so_pairs

[('Godless', 'Pete Hautman'),
 ('Goddess of Anarchy', 'Jacqueline Jones'),
 ('Refugees from an Imaginary Country', 'Darrell Schweitzer'),
 ('Pseudomonarchia Daemonum', 'Johann Weyer'),
 ('Malebolgia', 'Todd McFarlane'),
 ('Cellini Salt Cellar', 'Benvenuto Cellini'),
 ("The No. 1 Ladies' Detective Agency", 'Alexander McCall Smith'),
 ('Frankie', 'Lucy Gannon'),
 ('A Breath of Life', 'Clarice Lispector'),
 ('Quicken Forbidden', 'Dave Roman'),
 ('Shikamaru Nara', 'Masashi Kishimoto'),
 ('Life Extension: A Practical Scientific Approach', 'Durk Pearson'),
 ("Lady Audley's Secret", 'Mary Elizabeth Braddon'),
 ('Lyla Lerrol', 'Jerry Siegel'),
 ('The Traitor Queen', 'Trudi Canavan'),
 ('Plessy v. Ferguson', 'John Marshall Harlan'),
 ('Icebreaker', 'John Gardner'),
 ('Rocinante', 'Miguel de Cervantes'),
 ('The Mismeasure of Man', 'Stephen Jay Gould'),
 ("Ruh al-Ma'ani", 'Mahmud al-Alusi'),
 ('Woman in the Mists', 'Farley Mowat'),
 ('The Smiling Dogs', 'Paul Ernst'),
 ('Ma Gastronomie', 'Fernand

In [11]:

sorted(so_pairs, key=lambda x: x[1])

[('Krishna, the Supreme Personality of Godhead',
  'A. C. Bhaktivedanta Swami Prabhupada'),
 ('Dawn of the Emperors: Thyatis and Alphatia', 'Aaron Allston'),
 ("The Complete Priest's Handbook", 'Aaron Allston'),
 ('The Bad Guys', 'Aaron Blabey'),
 ('Robert Ritchie', 'Aaron Sorkin'),
 ('Guerilla Open Access Manifesto', 'Aaron Swartz'),
 ('Hikayat Abdullah', 'Abdullah bin Abdul Kadir'),
 ('The Fathers', 'Abel Azcona'),
 ('Mediocre But Arrogant', 'Abhijit Bhaduri'),
 ('Equestrian statue of Redvers Buller', 'Adrian Jones'),
 ('Value Migration: How to Think Several Moves Ahead of the Competition',
  'Adrian Slywotzky'),
 ('Agatha Christie bibliography', 'Agatha Christie'),
 ('One, Two, Buckle My Shoe', 'Agatha Christie'),
 ('Natives: Race and Class in the Ruins of Empire', 'Akala'),
 ('S.T.A.K.E.', 'Al Ewing'),
 ("Kitab al-'Ayn", 'Al-Khalil ibn Ahmad al-Farahidi'),
 ('Star Wars: From the Adventures of Luke Skywalker', 'Alan Dean Foster'),
 ('The Owl Service', 'Alan Garner'),
 ('Toxic!', 'Al