# SPARQL: Fortgeschrittene Themen

In dieser Übung schauen wir uns einige der fortgeschrittenen Themen über SPARQL etwas im Detail an. Wie bisher, verwenden wir RDF welches unsere Pink Floyd Diskographie beschreibt.

In [None]:
import pandas as pd
from io import BytesIO
from io import StringIO
from rdflib import Graph
from rdflib.plugins.sparql.results.csvresults import CSVResultSerializer
from IPython.display import display

rdf = """
@prefix ex: <http://example.org#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a ex:Album ;
   ex:title "The Dark Side of the Moon"^^xsd:string ;
   ex:label "Harvest, EMI"@en ;
   ex:released [ 
     ex:day "16"^^xsd:int ;
     ex:month "03"^^xsd:int ;
     ex:year "1973"^^xsd:int 
   ] .
   
[] a ex:Album ;
   ex:title "The Wall"^^xsd:string ;
   ex:label "Harvest, EMI"@en ;
   ex:released [ 
     ex:day "30"^^xsd:int ;
     ex:month "11"^^xsd:int ;
     ex:year "1979"^^xsd:int 
   ] .

[] a ex:Single ;
   ex:title "What God Wants, Part 1"^^xsd:string ;
   ex:author [
     ex:firstname "Roger" ;
     ex:lastname "Waters"
   ] ;
   ex:released [ 
     ex:year "1992"^^xsd:int 
   ] .
"""

g = Graph()

r = g.parse(data=rdf, format='turtle')

def query(q):
    serializer = CSVResultSerializer(g.query(q))
    output = BytesIO()
    serializer.serialize(output)
    display(pd.read_csv(StringIO(output.getvalue().decode())))

In [None]:
q = """
PREFIX ex: <http://example.org#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 

CONSTRUCT {
  ?album ex:band "Pink Floyd" .
  ?album ex:title ?title .
} WHERE {
  ?album rdf:type ex:Album .
  ?album ex:title ?title .
  FILTER (?title = "The Dark Side of the Moon"^^xsd:string || ?title = "The Wall")
}
"""

qr = g.query(q)

gr = Graph()
gr.bind('ex', 'http://example.org#')

for row in qr:
    gr.add(row)

print(gr.serialize(format='turtle').decode('utf-8'))

# Erklären Sie die CONSTRUCT Abfrage: 
# Erklären Sie das Resultat: 

In [None]:
q = """
PREFIX ex: <http://example.org#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

ASK {
  ?single rdf:type ex:Single .
}
"""

qr = g.query(q)

for row in qr:
    print(row)
    
# Erklären Sie die ASK Abfrage: 
# Warum ist das Resultat 'True': 

In [None]:
q = """
PREFIX ex: <http://example.org#> 

ASK {
  [] ex:author [
     ex:lastname "Water"
  ]
}
"""

qr = g.query(q)

for row in qr:
    print(row)
    
# Erklären Sie die ASK Abfrage: 
# Warum ist das Resultat 'False': 

In [None]:
q = """
PREFIX ex: <http://example.org#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?title 
WHERE { 
  [] ex:title ?title ;
     ex:released [ ex:year ?year ]
}
ORDER BY (?year)
"""

query(q)

# Erklären Sie die Abfrage: 
# In welcher Reihenfolge werden die Resultate angezeigt? Antwort:  

In [None]:
q = """
PREFIX ex: <http://example.org#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?title 
WHERE { 
  [] ex:title ?title ;
     ex:released [ ex:year ?year ]
}
ORDER BY DESC(?year)
"""

query(q)

# Inwiefern ist dieses Resultat anders als das letzte? Antwort:  

In [None]:
q = """
PREFIX ex: <http://example.org#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?title ?year
WHERE { 
  [] ex:title ?title ;
     ex:released [ ex:year ?year ]
}
ORDER BY DESC(?year) LIMIT 1 OFFSET 0
"""

query(q)

q = """
PREFIX ex: <http://example.org#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?title ?year
WHERE { 
  [] ex:title ?title ;
     ex:released [ ex:year ?year ]
}
ORDER BY DESC(?year) LIMIT 1 OFFSET 1
"""

query(q)

q = """
PREFIX ex: <http://example.org#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?title ?year
WHERE { 
  [] ex:title ?title ;
     ex:released [ ex:year ?year ]
}
ORDER BY DESC(?year) LIMIT 1 OFFSET 2
"""

query(q)

# Wieviele Abfragen gibt es hier? Antwort: 
# Erklären Sie das Resultat, insbesondere auch die jeweilige Anzahl Resultate: 

Ähnlich wie in der Vorlesung für DBpedia gezeigt, stellen wir als nächstes einige Abfragen an den SPARQL Endpoint [http://dblp.l3s.de/d2r/snorql/](http://dblp.l3s.de/d2r/snorql/). Dieser liefert bibliographische Information der [dblp](https://dblp.uni-trier.de/) (Digital Bibliography & Library Project) und ist vom [Forschungszentrum L3S](https://www.l3s.de/de/home) betrieben.

In [None]:
# !pip install SPARQLWrapper

import pandas as pd
from io import StringIO
from SPARQLWrapper import SPARQLWrapper, JSON

pd.set_option('display.max_colwidth', 200)

prefixes = """
PREFIX swrc: <http://swrc.ontoware.org/ontology#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX d2r: <http://sites.wiwiss.fu-berlin.de/suhl/bizer/d2r-server/config.rdf#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX map: <file:///home/diederich/d2r-server-0.3.2/dblp-mapping.n3#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
"""

def query(endpoint, sparql):
    sw = SPARQLWrapper(endpoint)
    sw.setQuery('{}{}'.format(prefixes, sparql))
    sw.setReturnFormat(JSON)
    json = sw.query().convert()
    variables = json['head']['vars']
    bindings = json['results']['bindings']
    data = {}
    for variable in variables:
        values = []
        for binding in bindings:
            values.append(binding[variable]['value'])
        data[variable] = values
    df = pd.DataFrame(data=data)
    display(df)

In [None]:
query('http://dblp.l3s.de/d2r/sparql', """
SELECT ?title ?issued ?url
WHERE {
  [] dc:creator [ rdfs:label "Bijan Parsia" ] ;
     dc:title ?title ;
     dcterms:issued ?issued ;
     foaf:homepage ?url
}
ORDER BY DESC(?issued)
LIMIT 10
""")

# Erklären Sie was hier geschieht: 