## Transform Heidelberg Inscription Data to Linked Art 



https://github.com/epigraphic-database-heidelberg/data



In [1]:
try:
    import IPython
except:
    %pip install IPython
    import IPython 
    
from IPython.display import display, IFrame, HTML, Javascript
from IPython.core.display import HTML


try:    
    import json
except:
    !pip install json
    import json
    
try:
    import IPython
except:
    !pip install IPython
    import IPython   
    
ns = {'tei': "http://www.tei-c.org/ns/1.0" }

import os
import xml.etree.ElementTree as ET


HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")

#  Transforming Archaeological Data to Linked Art

This notebook provided a step through the process of transforming archaeological data to Linked Art.




In [2]:
# linked art stub

doc_stub = {  
    "@context": "https://linked.art/ns/v1/linked-art.json",
    "id": "",
    "type": "HumanMadeObject"  , 
    "label": ""
      }



## Load Data Files



In [16]:
input_folder = './heidelberg/data/input/inscriptions/1/1/'
output_filepath = "./heidelberg/data/output/"

file_list=os.listdir(input_folder)

doc_api = {"object": []}
cnt = 0

for file in file_list:
    with open(input_folder + file) as filepath:
        cnt = cnt + 1
        tree = ET.parse(filepath)
        root = tree.getroot()
       # print(root)
        
        uri =  root.find(".//tei:idno[@type='URI']", ns)
        if uri == None:
            continue
            
        title = root.find("./tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title",ns)
        identifiers = root.findall(".//tei:publicationStmt/tei:idno", ns)
        objtype = root.find(".//tei:objectType", ns)
        if objtype != None:
            object_type_id = ""
            object_type = objtype.text
            if "ref" in objtype.attrib:
                object_type_id = objtype.attrib["ref"]
            
        filename = root.find(".//tei:idno[@type='localID']",ns )
        
        
        
        

        
        doc = doc_stub
        
        
        doc["id"] = uri.text
        doc["label"] = title.text
        doc["identified_by"] = []
        doc["identified_by"].append(
            {
              "type": "Name",
                  "classified_as": [{
                      "id": "http://vocab.getty.edu/aat/300404670",
                      "type": "Type",
                      "_label": title.text
                    } ]
              })
        for id in identifiers:
            if id.text != "None" :
                doc["identified_by"].append({
                      "type": "Identifier",
                      "classified_as": [{
                              "id": "http://vocab.getty.edu/page/aat/300404626",
                              "type": "Type",
                              "_label": id.attrib["type"] 
                            }],
                      "content": id.text })
                
    doc["classified_as"] = []

    if objtype != None:
        doc["classified_as"].append({
          "id": object_type_id,
          "type": "Type",
          "_label": object_type
        })

    # DIMENSIONS
    # Dimensions are described in epidoc .//dimensions
    dimensions = root.find(".//tei:dimensions", ns)
    doc["dimension"] = []
    
    unit = ""
    if "unit" in dimensions.attrib:
        unit = dimensions.attrib["unit"]

    for dimension in dimensions:

        doc["dimension"].append({
              "type": "Dimension",
              "classified_as": [{
                      "id": "http://vocab.getty.edu/aat/300055647",
                      "type": "Type",
                      "_label": dimension.tag.split('}', 1)[1].capitalize() }],
              "value": dimension.text,
              "unit": {
                    "id": "http://vocab.getty.edu/aat/300379100",
                    "type": "MeasurementUnit",
                    "_label": unit}
            })
        
    """
    <provenance type="found">  
                            
                        <placeName ref="http://www.geonames.org/">Cuma, bei</placeName>
                        <placeName type="modern_region" ref="http://www.geonames.org/">Campania</placeName>
                        <placeName type="modernCountry" ref="http://www.geonames.org/countries/it/">Italien</placeName>
                        </provenance>
                    </history>
                </msDesc>
            </sourceDesc>
        </fileDesc>
    """

    # LOCATION
    last_country_text = last_region_text = last_settlement_text = ""
    
    places = root.findall(".//tei:provenance/tei:placeName", ns)
    
    # COUNTRY
    for place in places:
        if "type" in place.attrib and place.attrib["type"] == 'modernCountry':
            last_country_text = place.text
    # REGION
        if "type" in place.attrib and place.attrib["type"] == 'modern_region':
                last_region_text = place.text
                print(last_region_text)
    # SETTLEMENT      
        if "type" not in place.attrib:
            last_settlement_text = place.text

    doc["part"] = [{
      "type": "Encounter",
      "_label": "Last Recorded Location",
      "took_place_at": [{
            "type": "Place",
            "_label": last_settlement_text,
            "classified_as" : [{
                  "id": "http://vocab.getty.edu/page/aat/300008346",
                  "type": "Type",
                  "_label": "Settlement" }]
        },
        {
        "type": "Place",
        "_label": last_country_text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300387506",
              "type": "Type",
              "_label": "Country"}]
        },   
        {
        "type": "Place",
        "_label": last_region_text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300182722",
              "type": "Type",
              "_label": "Region"
            }]
        }]
    }]

    doc["encountered"] = [{
          "type": "HumanMadeObject",
          "_label": title.text,
        "classified_as" : ""
        }]
    
    if objtype != None:
        doc["encountered"] = [{       
                      "id": object_type_id,
                      "type": "Type",
                      "_label": object_type,
                      "classified_as": [{
                          "id": "http://vocab.getty.edu/aat/300435443",
                          "type": "Type",
                          "_label": "Type of Work"}]
            }]
   
    text_file = open("output_filepath" + filename.text + ".json", "wt")
    text_file.write(json.dumps(doc,indent=2, ensure_ascii=False))
    text_file.close()
      
    api_filename = "./linkedart/api/heidelberg/object/" + filename.text + "/index.json"
    os.makedirs(os.path.dirname(api_filename), exist_ok=True)
    text_file = open(api_filename, "wt")
    text_file.write(json.dumps(doc,indent=2, ensure_ascii=False))
    text_file.close()
    
    doc_api_doc = doc
    doc_api_doc["id"] = filename.text
    doc_api["object"].append(doc_api_doc)
        
text_file = open("./heidelberg/db.json", "wt")
text_file.write(json.dumps(doc_api,indent=2, ensure_ascii=False))
text_file.close()   
        
        
        

  

Skopski region
Cádiz
Venezia
Roma
Caraș-Severin
Venezia
Lazio
Lazio
Massa Carrara
Roma
Puglia
Campania
Salamanca
Salamanca
Campania
Lazio
Abruzzo
None
None
Umbria
Sevilla
Lazio?
Hunedoara
Pays de la Loire
Caraș-Severin
Campania
Lazio?
Caraș-Severin
Lazio
Umbria
République et Canton de Genève
Braga
None
Sevilla
Umbria
Campania
Badajoz
Rheinland-Pfalz
Roma
Viseu
Limburg
Tipasa
Puglia
Braga
Auvergne-Rhône-Alpes
Puglia
Caraș-Severin
Comunidad Autónoma de la Región de Murcia
None
None
Sevilla
Roma
Cádiz
Tipasa
Roma
Cádiz
Cádiz
Cluj
Sevilla
Lazio
Lazio
Lazio
Roma
Massa Carrara
Viana do Castelo
Campania
Venezia
Lazio
Lazio
Lazio
Lazio
Lazio
None
None
Lazio
Tunis
None
Umbria
Hunedoara
Sevilla
Izmir
Puglia
Caraș-Severin
Lazio
Puglia
Viterbo
Braga
Lisboa
Lisboa
Lazio
Lazio
Niederösterreich
Al Marj
Modena
Lisboa
Siliana
Extremadura
Viseu
Auvergne-Rhône-Alpes
Cáceres
Auvergne-Rhône-Alpes
Napoli
León
Auvergne-Rhône-Alpes
Auvergne-Rhône-Alpes
Hunedoara
Hunedoara
Logroño
Sălaj
Jaén
Lazio?
Cádiz
Rabat

Lazio
Niederösterreich
Ípeiros
Sevilla
Sălaj
Irak
Roma
Hunedoara
Istarska županija
Roma
Lazio
Umbria
Tipasa
Auvergne-Rhône-Alpes
Lazio
Lazio
Roma
Salamanca
Centre-Val de Loire
Umbria
None
Pesaro
Provence-Alpes-Côte d’Azur
Auvergne-Rhône-Alpes
None
Lazio?
Shaḩḩat
Roma
Sevilla
Sevilla
Extremadura
Provence-Alpes-Côte d’Azur
Lazio
Campania
Campania
Puglia
None
Lazio?
Roma
Lucca
Libanon
Izmir
Foggia
Puglia
Marche
Siliana
Limburg
Abruzzo
Le Kef
Zeeland
None
Siliana
Roma
Foggia
Auvergne-Rhône-Alpes
Salamanca
Puglia
Roma
Tipasa
Viana do Castelo
Alba
None
None
Tipasa
Lazio
Lazio
Hunedoara
Roma
Sevilla
Sevilla
Niederösterreich
Cluj
Batna
Lazio
Istarska županija
Lazio
Alba
Andalucía
Batna
Viterbo
Caraș-Severin
Jaén
Extremadura
Lazio
Roma
Lazio
Umbria
Orense
Siliana
Provence-Alpes-Côte d’Azur
Castelo Branco
Braga
Extremadura
Campania
Campania
Sevilla
Sevilla
Puglia
Napoli
Provence-Alpes-Côte d’Azur
Umbria
Umbria
Lisboa
Lisboa
Lazio
Lazio?
Grand Est
Niederösterreich
Niederösterreich
Puglia
Puglia
N