# Tasks

Please first read assignment 2 instruction file, import necessary libararies, load the dataset and then write SPARQL query for following questions:

1. List the top four oldest people in the graph from oldest to youngest
2. Identify the shortest student who has at least four classmates 
3. List the teachers who have the highest salaries (in increasing order) and have at least five colleagues?
4. Give the mean (average) age of professors, both male and female, group by gender.
5. For each employee, calculate their "net salary"\
Net Salary Calculation = Base salary - income tax (%4 of the base salary) - pension (%6 of the base salary)

6. List all people with their first name or family name starting with the letter F, E, or A

7. List all `dbe:Researcher` using inference

Requirements: your query should also returns subclasses of `dbe:Researcher`, such as PhD Candidates and AssistantResearcher, without directly using their URI (use the `rdfs:subClassOf` properties)

8. Create missing relations when `:isSupervisedBy` is defined in one direction but not `:supervises` in the other direction. For example, if john `:isSupervisedBy` mary, then we know that mary `:supervises` john

Requirements: you should use a CONSTRUCT query to build the missing triples

9. Count the number of men and women per `rdf:type` in the graph (aka. the `a` relation)

10. Write a federated query from wikidata for retrieving inception date of Arya Parker’s college 

**Important**

In the following cell you must provide code for loading dataset in a graph.

In [221]:
from rdflib import URIRef, BNode, Literal, Namespace
from rdflib.namespace import FOAF, DCTERMS, XSD, RDF, SDO
from rdflib import Graph
import validators


ex= Namespace("http://exampleorg/")
schema= Namespace("http://schemaorg/")
xsd= Namespace("http://wwww3org/2001/XMLSchema#")
rdfs= Namespace("http://wwww3org/2000/01/rdf-schema#")
wde= Namespace("https://wwwwikidataorg/wiki/")
wdp= Namespace("http://wwwwikidataorg/prop/statement/")
dbp= Namespace("http://dbpediaorg/property/")
dbe= Namespace("https://dbpediaorg/page/")
dbo= Namespace("https://dbpediaorg/ontology/")
geo= Namespace("http://wwwgeonamesorg/ontology#")
wiki= Namespace("http://wwwwikidataorg/entity/")
eg = Namespace("http://exampleorg/voc#")
rdf= Namespace("http://wwww3org/1999/02/22-rdf-syntax-ns#")
foaf= Namespace("http://xmlnscom/foaf/01/")
owl= Namespace("http://wwww3org/2002/07/owl#")

#load dataset in a graph

g = Graph()
g.parse('assignment2_dataset.ttl', format='turtle')

g.bind('ex',ex )
g.bind('schema',schema )
g.bind('xsd',xsd )
g.bind('rdfs',rdfs )
g.bind('wde',wde )
g.bind('dbp',dbp )
g.bind('dbe',dbe )
g.bind('dbo',dbo )
g.bind('geo',geo )
g.bind('wiki',wiki )
g.bind('eg',eg )
g.bind('rdf',rdf )
g.bind('foaf',foaf )
g.bind('owl',owl )


#write SPARQL queries

In [222]:
#SPARQL Query 1?
# List the top four oldest people in the graph from oldest to youngest

query_1 = """
SELECT ?person
WHERE {
    ?a rdfs:label ?person .
    ?a dbp:age ?age .

}
ORDER BY DESC(?age)
LIMIT 4
"""

qres_1 = g.query(query_1)
for row in qres_1:
    print(f"{row.person}")

Frank Jones
Katrine Phill
Arya Parker
John Brown


In [223]:
#SPARQL Query 2
# Identify the shortest student who has at least four classmates
# (PhD candidates are also included)

query_2 = """
SELECT ?name ?height
WHERE {
    ?student a/rdfs:subClassOf* dbe:Student .
    ?student rdfs:label ?name .
    ?student schema:height ?height .
    ?student ex:classmate ?classmate .
}
GROUP BY ?name ?height
HAVING (COUNT(?classmate)>= 4)
ORDER BY ASC(?height)

LIMIT 1
"""

qres_2 = g.query(query_2)
for row in qres_2:
    print(f"{row.name} with height {row.height}")

Harry Lee with height 177


In [224]:
#SPARQL Query 3
# List the teachers who have the highest salaries (in increasing order) and have at least five colleagues?
# fillip 9,500 / 6 colo
# John  5300 /6 colea
#

query_3 = """
SELECT ?name ?salary
WHERE {
    ?a schema:employee ?teacher;
     rdfs:label ?name .
    ?a schema:baseSalary ?salary .
    ?a schema:colleague ?colleague .
}

GROUP BY ?name ?salary
HAVING(COUNT(?colleague)>=5)
ORDER BY ASC(?salary)
"""

qres_3 = g.query(query_3)
for row in qres_3:
    print(f"{row.name} {row.salary}")

Arya Parker 5000
John Brown 5300
Phillip Miller 9500


In [225]:
#SPARQL Query 4
# Give the mean (average) age of professors, both male and female, group by gender.
# John Brown 53
# Phillip Miller 38


query_4 = """
SELECT (AVG(?age) as ?avgAge) ?gender
WHERE {
    ?a a wiki:Q121594 .
    ?a rdfs:label ?name .
    ?a dbp:age ?age.
    ?a schema:gender ?gender .
}
GROUP BY ?gender
"""

qres_4 = g.query(query_4)
for row in qres_4:
    print(f"{row.avgAge} {row.gender}")

45.5 http://schema.org/Male


In [226]:
#SPARQL Query 5
# For each employee, calculate their "net salary" Important notes: Net Salary Calculation = Base salary - income tax (%4 of the base salary) - pension (%6 of the base salary)

query_5 = """
SELECT ?name ?net
WHERE {
   ?x schema:employee ?employee ;
        rdfs:label ?name .
   ?x schema:baseSalary ?baseSalary .

   BIND((?baseSalary - (4/100 * ?baseSalary) - (6/100 * ?baseSalary)) AS ?net).
}
"""

qres_5 = g.query(query_5)
for row in qres_5:
    print(f"{row.name} {row.net}")

John Brown 4770.00
Arya Parker 4500.00
Frank Jones 3870.00
Phillip Miller 8550.00
Phil Johnson 4680.00
Patrick Wilson 4230.00


In [227]:
#SPARQL Query 6
 # List all people with their first name or family name starting with the letter F, E, or A

query_6 = """
SELECT ?firstName ?familyName
WHERE {
   ?x schema:givenName ?firstName .
   ?x schema:familyName ?familyName .

   FILTER(strStarts(?firstName,"F") || strStarts(?firstName,"E") || strStarts(?firstName,"A") || strStarts(?familyName,"F") || strStarts(?familyName,"E") || strStarts(?familyName,"A"))
}
"""


qres_6 = g.query(query_6)
for row in qres_6:
    print(f"{row.firstName} {row.familyName}")

Alice Doe
Frank Jones
George Anderson
Eric Johnson
Elsa Taylor


In [228]:
#SPARQL Query 7
# List all `dbe:Researcher` using inference

query_7 = """
SELECT ?name
WHERE {
    ?researcher a/rdfs:subClassOf* dbe:Researcher .
    ?researcher rdfs:label ?name .
}
"""


qres_7 = g.query(query_7)
for row in qres_7:
    print(f"{row.name}")

Katrine Phill
Alice Doe
John Brown
Phillip Miller
Arya Parker
Frank Jones
Phil Johnson
Patrick Wilson


In [229]:
#SPARQL Query 8
# Create missing relations when `:isSupervisedBy` is defined in one direction but not
# `:supervises` in the other direction. For example, if john `:isSupervisedBy` mary, then
# we know that mary `:supervises` john

query_8 = """
CONSTRUCT{
?x :supervises ?y
 } WHERE {
    ?y :isSupervisedBy ?x .
}
"""
qres_8 = g.query(query_8)
for row in qres_8:
    print(f"{row}")


(rdflib.term.URIRef('http://example.org/JohnBrown'), rdflib.term.URIRef('http://example.org/voc#supervises'), rdflib.term.URIRef('http://example.org/AliceDoe'))


In [230]:
#SPARQL Query 9
# Count the number of men and women per `rdf:type` in the graph (aka. the `a` relation)


query_9 = """
SELECT ?people (count(?people) as ?pcount) ?gender
WHERE {
    ?people schema:gender ?gender .
}

GROUP BY ?gender
"""

qres_9 = g.query(query_9)
for row in qres_9:
    print(f" {row.gender} count: {row.pcount} ,")

 http://schema.org/Female count: 7 ,
 http://schema.org/Male count: 8 ,


In [231]:
#SPARQL Query 10

wdt= Namespace('http://www.wikidata.org/prop/direct/')
wd= Namespace('http://www.wikidata.org/entity/')

g.bind('wdt',wdt)
g.bind('wd',wd)

query_10 = """
SELECT ?arya ?iDate
WHERE
{
   ?arya rdfs:label "Arya Parker" .
  SERVICE <https://query.wikidata.org/sparql> {
    wd:Q1137652 wdt:P571 ?iDate . }
}
"""

qres_10 = g.query(query_10)
for row in qres_10:
    print(f"{row.arya} has inception query {row.iDate}")

http://example.org/AryaParker has inception query 1976-01-01T00:00:00+00:00
