## Transform Heidelberg Inscription Data to Linked Art 



https://github.com/epigraphic-database-heidelberg/data



In [1]:
try:
    import IPython
except:
    %pip install IPython
    import IPython 
    
from IPython.display import display, IFrame, HTML, Javascript
from IPython.core.display import HTML


try:    
    import json
except:
    !pip install json
    import json
    
try:
    import IPython
except:
    !pip install IPython
    import IPython   
    
ns = {'tei': "http://www.tei-c.org/ns/1.0" }

import os
import xml.etree.ElementTree as ET


HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")

#  Transforming Archaeological Data to Linked Art

This notebook provided a step through the process of transforming archaeological data to Linked Art.




In [2]:
# linked art stub

doc_stub = {  
    "@context": "https://linked.art/ns/v1/linked-art.json",
    "id": "",
    "type": "HumanMadeObject"  , 
    "label": ""
      }



## Load Data Files



In [6]:
input_folder = './heidelberg/data/input/inscriptions/1/1/'
output_filepath = "./heidelberg/data/output/"

file_list=os.listdir(input_folder)

doc_api = {"object": []}
cnt = 0

for file in file_list:
    with open(input_folder + file) as filepath:
        cnt = cnt + 1
        tree = ET.parse(filepath)
        root = tree.getroot()
       # print(root)
        
        uri =  root.find(".//tei:idno[@type='URI']", ns)
        if uri == None:
            continue
            
        title = root.find("./tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title",ns)
        identifiers = root.findall(".//tei:publicationStmt/tei:idno", ns)
        objtype = root.find(".//tei:objectType", ns)
        if objtype != None:
            object_type_id = ""
            object_type = objtype.text
            if "ref" in objtype.attrib:
                object_type_id = objtype.attrib["ref"]
            
        filename = root.find(".//tei:idno[@type='localID']",ns )
        print(filename.text)
        dimensions = root.find(".//tei:dimensions", ns)
        
        last_country_text = last_region_text = last_settlement_text = ""
        
        last_country = root.find(".//tei:msIdentifier/tei:country", ns)
        if last_country != None:
            last_country_text = last_country.text
        last_region = root.find(".//tei:msIdentifier/tei:region", ns)
        if last_region != None:
            last_region_text = last_region.text
        last_settlement = root.find(".//tei:msIdentifier/tei:settlement", ns)
        if last_settlement != None:
            last_settlement_text = last_settlement.text
        last_repository = root.find(".//tei:msIdentifier/tei:repository", ns)

        
        doc = doc_stub
        
        
        doc["id"] = uri.text
        doc["label"] = title.text
        doc["identified_by"] = []
        doc["identified_by"].append(
            {
              "type": "Name",
                  "classified_as": [{
                      "id": "http://vocab.getty.edu/aat/300404670",
                      "type": "Type",
                      "_label": title.text
                    } ]
              })
        for id in identifiers:
            if id.text != "None" :
                doc["identified_by"].append({
                      "type": "Identifier",
                      "classified_as": [{
                              "id": "http://vocab.getty.edu/page/aat/300404626",
                              "type": "Type",
                              "_label": id.attrib["type"] 
                            }],
                      "content": id.text })
                
    doc["classified_as"] = []

    if objtype != None:
        doc["classified_as"].append({
          "id": object_type_id,
          "type": "Type",
          "_label": object_type
        })

    # Dimensions are described in epidoc .//dimensions
    doc["dimension"] = []

    for dimension in dimensions:
        
        unit = ""
        if "unit" in dimension.attrib:
            unit = dimension.attrib["unit"]
            
        doc["dimension"].append({
              "type": "Dimension",
              "classified_as": [{
                      "id": "http://vocab.getty.edu/aat/300055647",
                      "type": "Type",
                      "_label": dimension.tag.split('}', 1)[1].capitalize() }],
              "value": dimension.text,
              "unit": {
                    "id": "http://vocab.getty.edu/aat/300379100",
                    "type": "MeasurementUnit",
                    "_label": unit}
            })
        



    doc["part"] = [{
      "type": "Encounter",
      "_label": "Last Recorded Location",
      "took_place_at": [{
            "type": "Place",
            "_label": last_settlement_text,
            "classified_as" : [{
                  "id": "http://vocab.getty.edu/page/aat/300008346",
                  "type": "Type",
                  "_label": "Settlement" }]
        },
        {
        "type": "Place",
        "_label": last_country_text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300387506",
              "type": "Type",
              "_label": "Country"}]
        },   
        {
        "type": "Place",
        "_label": last_region_text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300182722",
              "type": "Type",
              "_label": "Region"
            }]
        }]
    }]

    doc["encountered"] = [{
          "type": "HumanMadeObject",
          "_label": title.text,
        "classified_as" : ""
        }]
    
    if objtype != None:
        doc["encountered"] = [{       
                      "id": object_type_id,
                      "type": "Type",
                      "_label": object_type,
                      "classified_as": [{
                          "id": "http://vocab.getty.edu/aat/300435443",
                          "type": "Type",
                          "_label": "Type of Work"}]
            }]
   
    text_file = open("output_filepath" + filename.text + ".json", "wt")
    text_file.write(json.dumps(doc,indent=2))
    text_file.close()
      
    api_filename = "./linkedart/api/heidelberg/object/" + filename.text + "/index.json"
    os.makedirs(os.path.dirname(api_filename), exist_ok=True)
    text_file = open(api_filename, "wt")
    text_file.write(json.dumps(doc,indent=2))
    text_file.close()
    
    
        
    doc_api_doc = doc
    doc_api_doc["id"] = filename.text
    doc_api["object"].append(doc_api_doc)
        
text_file = open("./heidelberg/db.json", "wt")
text_file.write(json.dumps(doc_api,indent=2))
text_file.close()   
        
        
        

  

HD000943
HD000957
HD000758
HD000980
HD000994
HD000764
HD000002
HD000016
HD000770
HD000599
HD000228
HD000200
HD000566
HD000572
HD000214
HD000348
HD000374
HD000412
HD000406
HD000360
HD000837
HD000189
HD000823
HD000638
HD000610
HD000176
HD000162
HD000604
HD000163
HD000605
HD000611
HD000177
HD000639
HD000822
HD000836
HD000188
HD000407
HD000361
HD000375
HD000413
HD000349
HD000573
HD000215
HD000201
HD000567
HD000229
HD000598
HD000017
HD000771
HD000765
HD000003
HD000995
HD000981
HD000759
HD000956
HD000942
HD000954
HD000940
HD000798
HD000968
HD000997
HD000029
HD000983
HD000773
HD000015
HD000001
HD000767
HD000559
HD000217
HD000571
HD000565
HD000203
HD000388
HD000439
HD000363
HD000405
HD000411
HD000377
HD000820
HD000834
HD000808
HD000149
HD000607
HD000161
HD000175
HD000613
HD000174
HD000612
HD000606
HD000160
HD000148
HD000809
HD000835
HD000821
HD000410
HD000376
HD000362
HD000404
HD000438
HD000389
HD000564
HD000202
HD000216
HD000570
HD000558
HD000766
HD000772
HD000014
HD000982
HD000996
HD000028
H

HD000026
HD000740
HD000998
HD000754
HD000032
HD000768
HD000581
HD000595
HD000542
HD000224
HD000230
HD000556
HD000218
HD000393
HD000387
HD000436
HD000350
HD000344
HD000422
HD000378
HD000191
HD000185
HD000813
HD000807
HD000152
HD000634
HD000620
HD000146
HD000608
HD000609
HD000621
HD000147
HD000153
HD000635
HD000806
HD000812
HD000184
HD000190
HD000379
HD000345
HD000423
HD000437
HD000351
HD000386
HD000392
HD000219
HD000231
HD000557
HD000543
HD000225
HD000594
HD000580
HD000769
HD000755
HD000033
HD000027
HD000999
HD000741
HD000972
HD000966
HD000796
HD000782
