# Padova Grand Tour - Queries

This notebook will provide ten insightful queries about our data. Get started by starting GraphDB with `docker compose up --force-recreate`.


Sparql wrapper (copy and pasted from the Individual Project's notebooks):

In [36]:

from SPARQLWrapper import SPARQLWrapper, JSON

# Taken from `data/ttlData/ontology.ttl`
prefixString = """
PREFIX owl: <http://www.w3.org/2002/07/owl#> 
PREFIX pgt: <https://padovagrandtour.github.io/entitites#> 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
PREFIX xml: <http://www.w3.org/XML/1998/namespace> 
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 
PREFIX sdo: <https://schema.org/>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
"""

# select and construct queries
def run_query(queryString):
    to_run = prefixString + "\n" + queryString

    # Our local endpoint, the repo name is defined in `data/graphdb-repo.ttl` 
    sparql = SPARQLWrapper("http://localhost:7210/repositories/pgt") 
    sparql.setTimeout(300)
    sparql.setReturnFormat(JSON)
    sparql.setQuery(to_run)

    try :
        results = sparql.query()
        json_results = results.convert()
        if len(json_results['results']['bindings'])==0:
            print("Empty")
            return []
        array = []
        for bindings in json_results['results']['bindings']:
            app =  [ (var, value['value'])  for var, value in bindings.items() ] 
            array.append(app)
        print(len(array)) # Print the array length
        return array

    except Exception as e :
        print("The operation failed", e)
    

In [37]:
# Improved version that also handles prefixes
mappedPrefixes = {}
for prefix in prefixString.split('\n'):
    if('PREFIX' in prefix):
        prefix = prefix.replace('PREFIX', '', 1).split(':', 1)
        mappedPrefixes[prefix[0].strip()] = prefix[1].strip().replace('<','').replace('>','')

def query(queryString, replacePrefixes=True):
    queryResults = run_query(queryString)
    if(replacePrefixes):
        for queryIndex, queryResultRaw in enumerate(queryResults):
            for subQueryIndex, subQueryResultRaw in enumerate(queryResultRaw):
                queryResult = queryResultRaw[subQueryIndex][1]
                for prefixName, prefixValue in mappedPrefixes.items():
                    queryResult = queryResult.replace(prefixValue, prefixName + ':')

                queryResults[queryIndex][subQueryIndex] = (queryResultRaw[subQueryIndex][0], queryResult)

        return queryResults
    else: return queryResults


Check if everything works:

In [38]:
query("""
select distinct * {
    ?s ?p ?o
}
LIMIT 3
""", replacePrefixes=True)


3


[[('s', 'rdf:type'), ('p', 'rdf:type'), ('o', 'rdf:Property')],
 [('s', 'rdf:type'), ('p', 'rdf:type'), ('o', 'rdfs:Resource')],
 [('s', 'rdf:Property'), ('p', 'rdf:type'), ('o', 'rdfs:Class')]]

## 1 - Get tour sites in order

We would like to retrieve every site touched by a specific tour, with name and coordinates.

If you just follow a naive approach, this is what you get:

```
[[('stepIndex', 'rdf:_1'), ('site', 'pgt:SITE1')],
 [('stepIndex', 'rdf:_10'), ('site', 'pgt:SITE10')],
 [('stepIndex', 'rdf:_11'), ('site', 'pgt:SITE11')],
 [('stepIndex', 'rdf:_12'), ('site', 'pgt:SITE12')],
 [('stepIndex', 'rdf:_13'), ('site', 'pgt:SITE13')],
 [('stepIndex', 'rdf:_14'), ('site', 'pgt:SITE14')],
 [('stepIndex', 'rdf:_15'), ('site', 'pgt:SITE15')],
 [('stepIndex', 'rdf:_2'), ('site', 'pgt:SITE2')],
 [('stepIndex', 'rdf:_3'), ('site', 'pgt:SITE3')],
 [('stepIndex', 'rdf:_4'), ('site', 'pgt:SITE4')],
 [('stepIndex', 'rdf:_5'), ('site', 'pgt:SITE5')],
 [('stepIndex', 'rdf:_6'), ('site', 'pgt:SITE6')],
 [('stepIndex', 'rdf:_7'), ('site', 'pgt:SITE7')],
 [('stepIndex', 'rdf:_8'), ('site', 'pgt:SITE8')],
 [('stepIndex', 'rdf:_9'), ('site', 'pgt:SITE9')]]
 ```

 This happens because rdf:_15 has a lower lexicographical order than rdf:_2. To get the right order, we need to strip the `rdf:` prefix and cast the result to integer.

In [39]:
query("""
SELECT ?stepIndexNumber ?siteName ?siteLat ?siteLong WHERE {
	pgt:TOUR0 pgt:steps ?stepNode .
    ?stepNode ?stepIndex ?site .

    BIND (strlen("http://www.w3.org/1999/02/22-rdf-syntax-ns#_") AS ?prefixLength)
    BIND (xsd:integer(SUBSTR(xsd:string(?stepIndex),?prefixLength + 1)) AS ?stepIndexNumber).

    OPTIONAL{ ?site sdo:name ?siteName. }
    OPTIONAL{ ?site geo:lat  ?siteLat.  }
    OPTIONAL{ ?site geo:long ?siteLong. }


} ORDER BY ?stepIndexNumber

""")


15


[[('stepIndexNumber', '1'),
  ('siteName', 'Museo archeologico'),
  ('siteLat', '45.41096111111111'),
  ('siteLong', '11.880011111111111')],
 [('stepIndexNumber', '2'),
  ('siteName', "Museo d'Arte Medievale e Moderna"),
  ('siteLat', '45.41096111111111'),
  ('siteLong', '11.880011111111111')],
 [('stepIndexNumber', '3'),
  ('siteName', 'Museo Bottacin'),
  ('siteLat', '45.411569444444446'),
  ('siteLong', '11.878119444444446')],
 [('stepIndexNumber', '4'),
  ('siteName', "Museo del Risorgimento e dell'Età Contemporanea"),
  ('siteLat', '45.40784722222222'),
  ('siteLong', '11.876925')],
 [('stepIndexNumber', '5'),
  ('siteName', 'Museo di anatomia patologica (Padova)'),
  ('siteLat', '45.405027777777775'),
  ('siteLong', '11.8859')],
 [('stepIndexNumber', '6'),
  ('siteName', 'Centro di Ateneo per i Musei - CAM')],
 [('stepIndexNumber', '7'),
  ('siteName', 'Collezione privata Safilo'),
  ('siteLat', '45.414833333333334'),
  ('siteLong', '11.928894444444444')],
 [('stepIndexNumber', '

## 2 -

In [40]:
query("""
SELECT DISTINCT ?site WHERE {
    ?site a pgt:CulturalSite .
    FILTER NOT EXISTS {
        ?site geo:lat ?lat;
              geo:long ?long.
    }
}
""")



8


[[('site', 'pgt:SITE22')],
 [('site', 'pgt:SITE23')],
 [('site', 'pgt:SITE25')],
 [('site', 'pgt:SITE26')],
 [('site', 'pgt:SITE27')],
 [('site', 'pgt:SITE31')],
 [('site', 'pgt:SITE34')],
 [('site', 'pgt:SITE37')]]

## 3 -

## 4 -

## 5 -

## 6 -

## 7 -

## 8 -

## 9 -

## 10 -