In [11]:
import json, csv, re

In [30]:
%run -i organ_utils.py

In [13]:
# load organ data and add to global variables
organdata = loadOrganData()
for key in organdata.keys():
    globals()[key] = organdata[key]

print(organdata.keys())


dict_keys(['organids', 'base', 'history_base', 'history_projects', 'dispositions', 'compoundstops', 'tech', 'texts_hist', 'texts_fulltexts', 'texts_tech', 'texts_bijzonderheden', 'texts_offsets'])


In [14]:
#build dict of places
places = []
for organid in organids:
    places.append(base[organid]['place'])
        
placesid = {}
for ix, b in enumerate(list(set(places))):
   placesid[b] = f'plaats{ix:04}'
placesid[''] = 'plaatsOnbekend'

In [15]:
re_pitch = re.compile(r'[0-9]{3}')

extracted_pitch = {} # store extracted pitches

#build dict of pitches
pitches = []
for organid in organids:
    match_pitch = re.search(re_pitch, tech[organid]['pitch'])
    if match_pitch:
        extracted_pitch[organid] = match_pitch.group()
        pitches.append(match_pitch.group())
    else:
        extracted_pitch[organid] = ''
        pitches.append('')

pitchesid = {}
for ix, p in enumerate(list(set(pitches))):
    pitchesid[p] = f'toonhoogte{ix:04}'
pitchesid[''] = 'toonhoogteOnbekend'

In [16]:
#build dict of years
years = []
for organid in organids:
    years.append(history_base[organid]['year'])
    for proj in history_projects[organid]:
        years.append(proj['date'])

yearsid = {}
for ix, y in enumerate(list(set(years))):
    yearsid[y] = f'jaar{ix:04}'
yearsid[''] = 'jaarOnbekend'

In [17]:
#build dict of builders
builders = []
for organid in organids:
    builders.append(history_base[organid]['builder'])
    for changes in history_projects[organid]:
        builders.append(changes['name'])
        
buildersid = {}
for ix, b in enumerate(list(set(builders))):
    buildersid[b] = f'bouwer{ix:04}'
buildersid[''] = 'bouwerOnbekend'

In [18]:
lodlive_ontology = """PREFIX : <http://example.com/orgels/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

# Classes

:Orgel a owl:Class ;
   rdfs:label "Orgel" ;
   rdfs:comment "Een orgel." .

:Aanpassing a owl:Class ;
   rdfs:label "Aanpassing" ;
   rdfs:comment "Een aanpassing aan een orgel." .

:Orgelbouwer a owl:Class ;
   rdfs:label "Orgelbouwer" ;
   rdfs:comment "Een orgelbouwer." .

:Plaats a owl:Class ;
   rdfs:label "Plaatsnaam" ;
   rdfs:comment "Een Plaatsnaam" .

:Jaar a owl:Class ;
   rdfs:label "Jaar" ;
   rdfs:comment "Een jaar" .

:Toonhoogte a owl:Class ;
   rdfs:label "Toonhoogte" ;
   rdfs:comment "Een toonhoogte" .

# Properties

:aangepast a owl:ObjectProperty ;
   rdfs:label "aangepast" ;
   rdfs:domain :Orgel ;
   rdfs:range :Aanpassing .

:toonhoogte a owl:DatatypeProperty ;
   rdfs:label "toonhoogte" ;
   rdfs:domain :Orgel ;
   rdfs:range rdfs:Literal .

:encyclopedietekst a owl:DatatypeProperty ;
   rdfs:label "Historie" ;
   rdfs:domain :Orgel ;
   rdfs:range rdfs:Literal .

:bouwer a owl:ObjectProperty ;
   rdfs:label "Bouwer" ;
   rdfs:domain :Orgel ;
   rdfs:range :Orgelbouwer .

:gebouwdIn a owl:ObjectProperty ;
   rdfs:label "Jaar" ;
   rdfs:domain :Orgel ;
   rdfs:range :Jaar .

:heeftToonhoogte a owl:ObjectProperty ;
   rdfs:label "Toonhoogte" ;
   rdfs:domain :Orgel ;
   rdfs:range :Toonhoogte .

:staatIn a owl:ObjectProperty ;
   rdfs:label "staatin" ;
   rdfs:domain :Orgel ;
   rdfs:range :Plaats .

:werkzaamheden a owl:DatatypeProperty ;
   rdfs:label "werkzaamheden" ;
   rdfs:domain :Aanpassing ;
   rdfs:range rdfs:Literal .

:uitgevoerdIn a owl:ObjectProperty ;
   rdfs:label "uitgevoerdIn" ;
   rdfs:domain :Aanpassing ;
   rdfs:range :Jaar .

:uitgevoerdDoor a owl:ObjectProperty ;
   rdfs:label "uitgevoerdDoor" ;
   rdfs:domain :Aanpassing ;
   rdfs:range :Orgelbouwer .

# Data

"""

In [29]:
#dump LOD for Lodlive

def text2html(tekst):
    return tekst.replace('\n','<br />').replace('"', '&quot;')
    
with open('../output/lod/organ_lodlive_base_data.ttl', 'w') as f:
    #write ontology
    f.write(lodlive_ontology)
    
    #write builders
    for builder, builderid in sorted(buildersid.items()):
        if builderid == 'bouwerOnbekend':
            builder = 'Bouwer Onbekend'
        f.write(':' + builderid + ' rdf:type :Orgelbouwer ;\n')
        f.write('    rdf:label ' + '"' + builder + '" ;\n')
        f.write('    dct:title ' + '"' + builder + '" .\n')
        f.write('\n')

    #write places
    for place, placeid in sorted(placesid.items()):
        if placeid == 'plaatsOnbekend':
            place = 'Plaats Onbekend'
        f.write(':' + placeid + ' rdf:type :Plaats ;\n')
        f.write('    rdf:label ' + '"' + place + '" ;\n')
        f.write('    dct:title ' + '"' + place + '" .\n')
        f.write('\n')

    #write years
    for year, yearid in sorted(yearsid.items()):
        if yearid == 'jaarOnbekend':
            year = 'Jaar Onbekend'
        f.write(':' + yearid + ' rdf:type :Jaar ;\n')
        f.write('    rdf:label ' + '"' + year + '" ;\n')
        f.write('    dct:title ' + '"' + year + '" .\n')
        f.write('\n')

    #write pitches
    for pitch, pitchid in sorted(pitchesid.items()):
        if pitchid == 'toonhoogteOnbekend':
            pitch = 'Toonhoogte Onbekend'
        f.write(':' + pitchid + ' rdf:type :Toonhoogte ;\n')
        f.write('    rdf:label ' + '"a = ' + pitch + ' Hz" ;\n')
        f.write('    dct:title ' + '"a = ' + pitch + ' Hz" .\n')
        f.write('\n')
        
    #write projects (aanpassingen)
    aanpassing_ID = 0
    for organid in organids:
        for proj in history_projects[organid]:
            proj['aanpassing_ID'] = f'Aanpassing_{str(aanpassing_ID).zfill(5)}'
            aanpassing_ID += 1
            bouwer = proj['name']
            if bouwer == '':
                bouwer = "Onbekend"
            datum = proj['date']
            if datum == '':
                datum = "Onbekend"
            titel = datum + ' ' + bouwer + ' ' + base[organid]['name']
            f.write(':' + proj['aanpassing_ID'] + ' rdf:type :Aanpassing ;\n')
            f.write('    :uitgevoerdIn :' + yearsid[proj['date']] + ' ;\n')
            f.write('    :uitgevoerdDoor :' + buildersid[proj['name']] + ' ;\n')
            f.write('    :werkzaamheden "' +  text2html('\n'.join(proj['changes'])) + '" ;\n')
            f.write('    rdf:label "' +  titel + '" ;\n')
            f.write('    dct:title "' +  titel + '" .\n')
            f.write('\n')
                
    #write organs
    for organid in organids:
        f.write(':' + organid + ' rdf:type :Orgel ;\n')
        f.write('    rdf:label ' + '"' + base[organid]['name'] + ', ' + history_base[organid]['year'] + '" ;\n')
        f.write('    dct:title ' + '"' + base[organid]['name'] + ', ' + history_base[organid]['year'] + '" ;\n')
        f.write('    :bouwer :' + buildersid[history_base[organid]['builder']] + ' ;\n')
        f.write('    :heeftToonhoogte :' + pitchesid[extracted_pitch[organid]] + ' ;\n')
        f.write('    :gebouwdIn :' + yearsid[history_base[organid]['year']] + ' ;\n')
        f.write('    :staatIn :' + placesid[base[organid]['place']] + ' ;\n')
        for proj in history_projects[organid]:
            f.write('    :aangepast :' + proj['aanpassing_ID'] + ' ;\n')
        f.write('    :encyclopedietekst "' + text2html(texts_fulltexts[organid]) + '" .\n'  )
        f.write('\n')

    
    #Disambiguate

In [28]:
#write lists of objects and object names

def formaturi_html(uri, text):
    return f"<tr><td><a href=\"https://syrinx.knorrie.org/~pvk/lodlive/app_en.html?{uri}\" target=\"_blank\">{uri}</a></td><td>{text}</td></tr>\n"

prefix = 'http://example.com/orgels/'

with open ('../output/lod/objects.txt', 'w') as f:
    with open ('../output/lod/objects.html', 'w') as h:
        h.write("""<html><head><meta charset="UTF-8"></head>
<body>
<a href="#orgels">Orgels</a><br />
<a href="#bouwers">Bouwers</a><br />
<a href="#plaatsnamen">Plaatsnamen</a><br />
<a href="#jaren">Jaren</a><br />
<a href="#toonhoogtes">Toonhoogtes</a><br />

<p>Sparql Endpoint: <a href="https://195.240.117.253:17200/repositories/orgeltest">https://195.240.117.253:17200/repositories/orgeltest</a>. Visit this url and accept the ssh certificate.</p>

""")
        
        #orgels
        h.write("<a name=\"orgels\" />\n<h2>Orgels</h2>\n<table><tr><td><b>URI</b></td><td><b>Object</b></td></tr>")
        for organid in sorted(organids):
            f.write(f"{prefix + organid + '  ':.<63} {base[organid]['name']}, {history_base[organid]['year']} \n")
            h.write(formaturi_html(prefix + organid, f"{base[organid]['name']}, {history_base[organid]['year']}</td></tr>\n"))
        h.write(f"</table>")

        #bouwers
        h.write("<a name=\"bouwers\" />\n<h2>Bouwers</h2>\n<table><tr><td><b>URI</b></td><td><b>Object</b></td></tr>")
        for builder, builderid in sorted(buildersid.items()): 
            f.write(f"{prefix + builderid + '  ':.<45} {builder} \n")
            h.write(formaturi_html(prefix + builderid, builder))
        h.write(f"</table>")
        
        #plaatsnamen
        h.write("<a name=\"plaatsnamen\" />\n<h2>Plaatsnamen</h2>\n<table><tr><td><b>URI</b></td><td><b>Object</b></td></tr>")
        for place, placeid in sorted(placesid.items()): 
            f.write(f"{prefix + placeid + '  ':.<45} {place} \n")
            h.write(formaturi_html(prefix + placeid, place))
        h.write(f"</table>")

        #jaren
        h.write("<a name=\"jaren\" />\n<h2>Jaren</h2>\n<table><tr><td><b>URI</b></td><td><b>Object</b></td></tr>")
        for year, yearid in sorted(yearsid.items()):
            f.write(f"{prefix + yearid + '  ':.<45} {year} \n")
            h.write(formaturi_html(prefix + yearid, year))
        h.write(f"</table>")

        #toonhoogtes
        h.write("<a name=\"toonhoogtes\" />\n<h2>Toonhoogtes</h2>\n<table><tr><td><b>URI</b></td><td><b>Object</b></td></tr>")
        for pitch, pitchid in sorted(pitchesid.items()):
            f.write(f"{prefix + pitchid + '  ':.<45} {pitch} \n")
            h.write(formaturi_html(prefix + pitchid, pitch))
        h.write(f"</table>")

        
        #end
        h.write('</table></body></html>')
        
        
