# Padova Grand Tour - Sparql Ingester

This notebook will generate a `sparql.ttl` turtle file with the data obtained by the `sparql/SparqlRetriever.ipynb` notebook.

Setup graph:

In [26]:
import os
from pathlib import Path

from rdflib import Graph, Literal, RDF, URIRef, Namespace
# rdflib knows about some namespaces, like FOAF, XSD, schema.org
from rdflib.namespace import FOAF, XSD, SDO
from rdflib.collection import Collection

# Main namespace
PGT = Namespace("https://padovagrandtour.github.io/entitites#")
# Be careful! the "simple GEO" namespace is not the same as the "advanced GEO" namespace exported by rdflib
GEO = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")

# Saving folder
savePath =  path + "/data/ttlData/"
os.makedirs(savePath, exist_ok=True)


# Bind namespaces
g = Graph()

g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("pgt", PGT)
g.bind("sdo", SDO)
g.bind("geo", GEO)


In [27]:
import json
# Read Json files
with open(path + '/sparql/categories.json') as f:
    categoriesJSON = json.load(f)

with open(path + '/sparql/museumProperties.json') as f:
    museumPropertiesJSON = json.load(f)

with open(path + '/sparql/museumThings.json') as f:
    museumThingsJSON = json.load(f)

with open(path + '/sparql/properties.json') as f:
    propertiesJSON = json.load(f)

with open(path + '/sparql/things.json') as f:
    thingsJSON = json.load(f)


In [28]:
# Add to graph while doing standard normalizations
def addToG(subject, predicate, obj, key, datatype):
    try: # Catch keyerrors
        if(obj[key] == obj[key]):   # check for NaN values
            if(datatype == XSD.string):
                g.add((subject, predicate, Literal(obj[key].strip(), datatype=datatype)))    
            else:
                g.add((subject, predicate, Literal(obj[key], datatype=datatype)))    
    except Exception as e:
        pass

In [29]:
museumURLDict = {}
museumNameDict = {}


SITEindex = 0




for museumURL, siteData in museumPropertiesJSON.items():
    museumData = {}
    for [akey, aval] in siteData[0]:
        museumData[akey] = aval
    
    if(museumData['name'] in museumNameDict.keys()):
        print("skipping (already inserted)", museumData['name'])
        museumURLDict[museumURL] = museumNameDict[museumData['name']]
    else:

        # Setup museum node
        museumURLDict[museumURL] = "SITEsq" +  str(SITEindex)
        museumNameDict[museumData['name']] = "SITEsq" +  str(SITEindex)
        print("inserting", museumURL, museumData['name'])
        Museum = URIRef(PGT["SITEsq" +  str(SITEindex)])
        SITEindex = SITEindex + 1
        g.add((Museum, RDF.type, PGT.Museum))

        # Add museum properties
        g.add((Museum, SDO.url, Literal(museumURL, datatype=SDO.URL)))  
        addToG(Museum, SDO.name, museumData, 'name', datatype=XSD.string)
        addToG(Museum, SDO.description, museumData, 'desc', datatype=RDF.HTML)
        addToG(Museum, SDO.image, museumData, 'img', datatype=SDO.URL)
        addToG(Museum, GEO['lat'], museumData, 'lat', datatype=XSD.float)
        addToG(Museum, GEO['long'], museumData, 'long', datatype=XSD.float)



inserting http://dati.beniculturali.it/iccd/cf/resource/CulturalInstituteOrSite/1469799782221 Palazzo Folco
inserting https://w3id.org/arco/resource/Site/7cd721378d4eed24c5285df08594b4fc Palazzo Dolco
inserting http://dati.beniculturali.it/iccd/cf/resource/CulturalInstituteOrSite/1469744914369 Convento degli Eremitani
inserting https://w3id.org/arco/resource/Site/62a37ce270999c1ee4b1ba1cb472939b Università di Padova
inserting http://dati.beniculturali.it/iccd/cf/resource/CulturalInstituteOrSite/ICCD_CF_8076015143451 Palazzina del prefetto (ex)
inserting http://dati.beniculturali.it/iccd/cf/resource/CulturalInstituteOrSite/ICCD_CF_6156344978451 Complesso Ingegneria
inserting http://dati.beniculturali.it/iccd/cf/resource/CulturalInstituteOrSite/ICCD_CF_4223302714451 Palazzo ECA (ex)
inserting http://dati.beniculturali.it/iccd/cf/resource/CulturalInstituteOrSite/ICCD_CF_0160885714451 Complesso Cavalli
asdsfgdsa Complesso Cavalli
inserting http://dati.beniculturali.it/iccd/cf/resource/Cult

In [30]:


g.serialize(destination=savePath+"sparql.ttl", format='turtle')


<Graph identifier=Nbe47ed4119394b48bd5e764154819f5c (<class 'rdflib.graph.Graph'>)>