# Transform I.Sicily Inscription Data to Linked Art - Directory

In [2]:
try:
    import IPython
except:
    %pip install IPython
    import IPython 
    
from IPython.display import display, IFrame, HTML, Javascript
from IPython.core.display import HTML


try:    
    import json
except:
    !pip install json
    import json
    
try:
    import IPython
except:
    !pip install IPython
    import IPython   
    
ns = {'tei': "http://www.tei-c.org/ns/1.0" }

import os
import xml.etree.ElementTree as ET


HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")

#  Transforming Archaeological Data to Linked Art

This notebook provided a step through the process of transforming archaeological data to Linked Art.




In [5]:
# linked art stub

doc_stub = {  
    "@context": "https://linked.art/ns/v1/linked-art.json",
    "id": "",
    "type": "HumanMadeObject"  , 
    "label": ""
      }

## Load Data File 

https://github.com/ISicily/ISicily/blob/master/inscriptions/ISic000001.xml

In [38]:
input_folder = './data/input/isicily/inscriptions/'
output_filepath = "./data/output/isicily/"

file_list=os.listdir(input_folder)

doc_api = {"object": []}

for file in file_list:
    with open(input_folder + file) as filepath:
        
        tree = ET.parse(filepath)
        root = tree.getroot()
       # print(root)
        
        uri =  root.find(".//tei:idno[@type='URI']", ns)
        if uri == None:
            continue
            
        title = root.find("./tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title",ns)
        identifiers = root.findall(".//tei:publicationStmt/tei:idno", ns)
        objtype = root.find(".//tei:objectType", ns)
        if objtype != None:
            object_type_id = ""
            object_type = objtype.text
            if "ref" in objtype.attrib:
                object_type_id = objtype.attrib["ref"]
            
        filename = root.find(".//tei:idno[@type='filename']",ns )
        dimensions = root.find(".//tei:dimensions", ns)
        
        last_country_text = last_region_text = last_settlement_text = ""
        
        last_country = root.find(".//tei:msIdentifier/tei:country", ns)
        if last_country != None:
            last_country_text = last_country.text
        last_region = root.find(".//tei:msIdentifier/tei:region", ns)
        if last_region != None:
            last_region_text = last_region.text
        last_settlement = root.find(".//tei:msIdentifier/tei:settlement", ns)
        if last_settlement != None:
            last_settlement_text = last_settlement.text
        last_repository = root.find(".//tei:msIdentifier/tei:repository", ns)

        
        doc = doc_stub
        
        doc["id"] = uri.text
        doc["label"] = title.text
        doc["identified_by"] = []
        doc["identified_by"].append(
            {
              "type": "Name",
                  "classified_as": [{
                      "id": "http://vocab.getty.edu/aat/300404670",
                      "type": "Type",
                      "_label": title.text
                    } ]
              })
        for id in identifiers:
            if id.text != "None":
                doc["identified_by"].append({
                      "type": "Identifier",
                      "classified_as": [{
                              "id": "http://vocab.getty.edu/page/aat/300404626",
                              "type": "Type",
                              "_label": id.attrib["type"] 
                            }],
                      "content": id.text })
                
    doc["classified_as"] = []

    if objtype != None:
        doc["classified_as"].append({
          "id": object_type_id,
          "type": "Type",
          "_label": object_type
        })

    # Dimensions are described in epidoc .//dimensions
    doc["dimension"] = []

    for dimension in dimensions:
        
        unit = ""
        if "unit" in dimension.attrib:
            unit = dimension.attrib["unit"]
            
        doc["dimension"].append({
              "type": "Dimension",
              "classified_as": [{
                      "id": "http://vocab.getty.edu/aat/300055647",
                      "type": "Type",
                      "_label": dimension.tag.split('}', 1)[1].capitalize() }],
              "value": dimension.text,
              "unit": {
                    "id": "http://vocab.getty.edu/aat/300379100",
                    "type": "MeasurementUnit",
                    "_label": unit}
            })
        



    doc["part"] = [{
      "type": "Encounter",
      "_label": "Last Recorded Location",
      "took_place_at": [{
            "type": "Place",
            "_label": last_settlement_text,
            "classified_as" : [{
                  "id": "http://vocab.getty.edu/page/aat/300008346",
                  "type": "Type",
                  "_label": "Settlement" }]
        },
        {
        "type": "Place",
        "_label": last_country_text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300387506",
              "type": "Type",
              "_label": "Country"}]
        },   
        {
        "type": "Place",
        "_label": last_region_text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300182722",
              "type": "Type",
              "_label": "Region"
            }]
        }]
    }]

    doc["encountered"] = [{
          "type": "HumanMadeObject",
          "_label": title.text,
        "classified_as" : ""
        }]
    
    if objtype != None:
        doc["encountered"] = [{       
                      "id": object_type_id,
                      "type": "Type",
                      "_label": object_type,
                      "classified_as": [{
                          "id": "http://vocab.getty.edu/aat/300435443",
                          "type": "Type",
                          "_label": "Type of Work"}]
            }]
   
    doc_api["object"].append(doc)
    text_file = open(output_filepath + filename.text + ".json", "wt")
    text_file.write(json.dumps(doc,indent=2))
    text_file.close()
    
text_file = open("db.json", "wt")
text_file.write(json.dumps(doc_api,indent=2))
text_file.close()   
        
        
        

  