In [1]:
try:
    import IPython
except:
    %pip install IPython
    import IPython 
    
from IPython.display import display, IFrame, HTML, Javascript
from IPython.core.display import HTML


try:    
    import json
except:
    !pip install json
    import json
    
try:
    import IPython
except:
    !pip install IPython
    import IPython   
    


HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")

#  Transforming Archaeological Data to Linked Art

This notebook provided a step through the process of transforming archaeological data to Linked Art.

## Load Data File 

https://github.com/ISicily/ISicily/blob/master/inscriptions/ISic000001.xml


In [2]:
ns = {'tei': "http://www.tei-c.org/ns/1.0" }

import os
import xml.etree.ElementTree as ET

input_folder = './data/input/isicily/inscriptions/'

file_list=os.listdir(input_folder)

for file in file_list:
    with open(input_folder + file) as filepath:
        
        tree = ET.parse(filepath)
        root = tree.getroot()
        
        

# Physical Object

# I.Sicily



## Linked.Art
https://linked.art/model/object/aboutness/#physical-object-and-visual-work

"Physical things are the carriers of intellectual works, be they visual works or textual works. "

Minimum representation for Physical Object in Linked Art

<pre>
{
  "@context": "https://linked.art/ns/v1/linked-art.json",
  "id": "http://sicily.classics.ox.ac.uk/inscription/ISic000001",
  "type": "HumanMadeObject",
  "_label": "Inscription",
  
  }
</pre>


In [3]:
    doc = {  
    "@context": "https://linked.art/ns/v1/linked-art.json",
    "id": "",
    "type": "HumanMadeObject"  , 
    "label": ""
      }

    uri =  root.find(".//tei:idno[@type='URI']", ns)
    doc["id"] = uri.text

    doc

{'@context': 'https://linked.art/ns/v1/linked-art.json',
 'id': 'http://sicily.classics.ox.ac.uk/inscription/ISic003944',
 'type': 'HumanMadeObject',
 'label': ''}


https://isicily.org/data-in-isicily/

## Identifiers

### I.Sicily

The I.Sicily number is a unique identifier, of the form ISic000298, which in turn forms the tail of the http URI that is assigned to the full record of each text (e.g. sicily.classics.ox.ac.uk/inscription/ISic000298). 

The number is marked up with <idno type=”filename”> in the TEI. 
    
This number does not change if the record is updated, and will persist as an identifier even when, e.g., two previously unconnected fragments are joined. 
    
The numbers are assigned sequentially to inscriptions as they are added to the dataset, and carry no other significance. Ordinarily, a number is assigned to an individual inscription. On rare occasions where two clearly independent texts are inscribed on the same object they will normally be assigned separate identifiers, but particularly with fragmentary texts complete consistency is impossible. 

All inscriptions in I.Sicily are also recorded in Trismegistos (www.trismegistos.org), which also provides a unique identifier (I.Sicily and Trismegistos collaborate, so new texts will eventually be assigned numbers in both systems). Decisions as to what constitutes a single document for these purposes may on occcasion differ between the two databases, but the records will be aligned (cf. http://www.trismegistos.org/about_identifiers.php).
    


### Linked Art
https://linked.art/model/base/#names-and-identifiers-for-a-resource
    
#### Names
https://linked.art/model/base/#names
"As the _label property is intended as internal documentation for the data, it is strongly recommended that every resource that should be rendered to an end user also have at least one specific name. The name could be for an object, a person, a group, an event or anything else. This pattern uses the identified_by property, with a Name resource. The value of the name is given in the content property of the Name."
    
<pre>
{
  "@context": "https://linked.art/ns/v1/linked-art.json",
  "id": "https://linked.art/example/object/22",
  "type": "HumanMadeObject",
  "_label": "Painting: Pasture and Sheep",
  "identified_by": [
    {
      "type": "Name",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/aat/300404670",
          "type": "Type",
          "_label": "Primary Name"
        }
      ]
      }
</pre>
    


<div id='vis3' style='height:100%;width:6000px'></div>

In [4]:
#path to title
# "./tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title"
path_title = "./tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title"

title = root.find(path_title,ns)
doc["label"] = title.text

doc["identified_by"] = []

doc["identified_by"].append(
    {
      "type": "Name",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/aat/300404670",
          "type": "Type",
          "_label": title.text
        }
      ]
      })

doc

{'@context': 'https://linked.art/ns/v1/linked-art.json',
 'id': 'http://sicily.classics.ox.ac.uk/inscription/ISic003944',
 'type': 'HumanMadeObject',
 'label': 'I.Sicily inscription 003944',
 'identified_by': [{'type': 'Name',
   'classified_as': [{'id': 'http://vocab.getty.edu/aat/300404670',
     'type': 'Type',
     '_label': 'I.Sicily inscription 003944'}]}]}

#### Identifiers
https://linked.art/model/base/#identifiers
    
"Many resources of interest are also given external identifiers, such as accession numbers for objects, ORCIDs for people or groups, lot numbers for auctions, and so forth. Identifiers are represented in a very similar way to names, but instead use the Identifier class. Identifiers will normally have a classification determining which sort of identifier it is, to distinguish between internal repository system assigned numbers from museum assigned accession numbers, for example.

As Identifiers and Names use the same identified_by property, the JSON will frequently have mixed classes in the array. Unlike Names, Identifiers are not part of human language and thus cannot have translations or a language associated with them.
"
    


In [5]:
""" path to identifiers
/TEI/teiHeader/fileDesc/publicationStmt/idno[@type=]          
"""

identifiers = root.findall(".//tei:publicationStmt/tei:idno", ns)

for id in identifiers:
    
    id_value = id.text
    id_type = id.attrib["type"] 
    
    if id_value != "None":
        doc["identified_by"].append({
      "type": "Identifier",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/page/aat/300404626",
          "type": "Type",
          "_label": id_type 
        }
      ],
      "content": id_value
    })

doc

{'@context': 'https://linked.art/ns/v1/linked-art.json',
 'id': 'http://sicily.classics.ox.ac.uk/inscription/ISic003944',
 'type': 'HumanMadeObject',
 'label': 'I.Sicily inscription 003944',
 'identified_by': [{'type': 'Name',
   'classified_as': [{'id': 'http://vocab.getty.edu/aat/300404670',
     'type': 'Type',
     '_label': 'I.Sicily inscription 003944'}]},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'filename'}],
   'content': 'ISic003944'},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'TM'}],
   'content': None},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'EDR'}],
   'content': None},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label

# Type of Object

## Linked Art
https://linked.art/model/base/#types-and-classifications

<pre>
"classified_as": [
    {
      "id": "http://vocab.getty.edu/aat/300033618",
      "type": "Type",
      "_label": "Painting"
    },
    {
      "id": "http://vocab.getty.edu/aat/300133025",
      "type": "Type",
      "_label": "Work of Art"
    }
  ]
</pre>

## I.Sicily EpiDoc

example:
objectType ana="#object.plaque" ref="https://www.eagle-network.eu/voc/objtyp/lod/259.html">plaque
                                    
path:
./physDesc/objectDesc/supportDesc/support/objectType
                                

In [6]:
doc["classified_as"] = []

objtype = root.find(".//tei:objectType", ns)

# objtype.attrib["ref"]
# objtype.text

doc["classified_as"].append({
          "id": "",
          "type": "Type",
          "_label": ""
        })

doc

{'@context': 'https://linked.art/ns/v1/linked-art.json',
 'id': 'http://sicily.classics.ox.ac.uk/inscription/ISic003944',
 'type': 'HumanMadeObject',
 'label': 'I.Sicily inscription 003944',
 'identified_by': [{'type': 'Name',
   'classified_as': [{'id': 'http://vocab.getty.edu/aat/300404670',
     'type': 'Type',
     '_label': 'I.Sicily inscription 003944'}]},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'filename'}],
   'content': 'ISic003944'},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'TM'}],
   'content': None},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'EDR'}],
   'content': None},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label

## Dimensions

### Linked Art 

https://linked.art/model/object/physical/#dimensions

"The physical dimensions of an object, such as height, width, diameter or weight, are included in the dimension property, and consist of three primary pieces of information:

The numeric value in value.
The type of dimension (e.g. height vs width) in classified_as, referencing an external vocabulary of dimension types.
The unit used to align the value with the real world such as inches, pounds or seconds. The unit should also be given from a controlled vocabulary.
"

<pre>
 "dimension": [
    {
      "type": "Dimension",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/aat/300055647",
          "type": "Type",
          "_label": "Width"
        }
      ],
      "value": 16,
      "unit": {
        "id": "http://vocab.getty.edu/aat/300379100",
        "type": "MeasurementUnit",
        "_label": "inches"
      }
    },]

</pre>


## I.Sicily EpiDoc

Dimensions are provided in epidoc .//dimensions

./physDesc/objectDesc/supportDesc/support/dimensions

><p>Marble plaque, employed as cover of a small sarcophagus</p>
                                    <material ana="#material.stone.marble" ref="http://www.eagle-network.eu/voc/material/lod/48.html">marble</material>
                                    <objectType ana="#object.plaque" ref="https://www.eagle-network.eu/voc/objtyp/lod/259.html">plaque</objectType>
                                   
                               

In [7]:
# Dimensions are described in epidoc .//dimensions
doc["dimension"] = []

dimensions = root.find(".//tei:dimensions", ns)



for dimension in dimensions:
    

    doc["dimension"].append({
      "type": "Dimension",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/aat/300055647",
          "type": "Type",
          "_label": dimension.tag.split('}', 1)[1].capitalize()
        }
      ],
      "value": dimension.text,
      "unit": {
        "id": "http://vocab.getty.edu/aat/300379100",
        "type": "MeasurementUnit",
        "_label": dimension.attrib["unit"]
      }})

""" <sourceDesc>
                <msDesc>
                    <msIdentifier>
                        <country>Italy</country>
                        <region>Sicily</region>
                        <settlement>Palermo</settlement>
                        <repository role="museum" ref="http://sicily.classics.ox.ac.uk/museum/064">Museo Archeologico Regionale Antonino Salinas</repository>
                        <idno type="inventory">3501</idno>
                        <altIdentifier>
                            <settlement/>
                            <repository/>
                            <idno type="old"/>
                        </altIdentifier>
                    </msIdentifier>
                    <msContents>
                        <textLang mainLang="la">Latin</textLang>
                    </msContents>
                    <physDesc>
                        <objectDesc>
                            <supportDesc>
                                <support><p>Marble plaque, employed as cover of a small sarcophagus</p>
                                    <material ana="#material.stone.marble" ref="http://www.eagle-network.eu/voc/material/lod/48.html">marble</material>
                                    <objectType ana="#object.plaque" ref="https://www.eagle-network.eu/voc/objtyp/lod/259.html">plaque</objectType>
                                    <dimensions><!--from ILPalermo-->
                                        <height unit="cm">17.5</height>
                                        <width unit="cm">29.3</width>
                                        <depth unit="cm">1.5-2</depth>
                                    </dimensions>
                                </support>
                                <condition ana="#condition.complete"/>
		    </supportDesc>"""

doc

{'@context': 'https://linked.art/ns/v1/linked-art.json',
 'id': 'http://sicily.classics.ox.ac.uk/inscription/ISic003944',
 'type': 'HumanMadeObject',
 'label': 'I.Sicily inscription 003944',
 'identified_by': [{'type': 'Name',
   'classified_as': [{'id': 'http://vocab.getty.edu/aat/300404670',
     'type': 'Type',
     '_label': 'I.Sicily inscription 003944'}]},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'filename'}],
   'content': 'ISic003944'},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'TM'}],
   'content': None},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label': 'EDR'}],
   'content': None},
  {'type': 'Identifier',
   'classified_as': [{'id': 'http://vocab.getty.edu/page/aat/300404626',
     'type': 'Type',
     '_label

# Encounters with Objects

## Linked Art 

https://linked.art/model/provenance/encounters/


"Objects can be encountered a long time after their production, especially in the realm of archaeology. These encounters are often called "finds" and the locations they take place at "find spots", but the object may have been never lost according to another culture that always knew about it. Encounters are frequently of objects from a long-vanished civilization, such as the Greek or Roman empires, but not necessarily. The object might be buried under the ash of a volcanic eruption, lost at sea, or otherwise pass out of human memory and record as far as the documenting institution is aware.

There might be multiple encounters recorded in a provenance chain, such as when an object is known to have been encountered and subsequently lost by a previous culture, and then rediscovered."

example:

<pre>

 "part": [
    {
      "type": "Encounter",
      "_label": "Encounter of Sculpture",
      "took_place_at": [
        {
          "type": "Place",
          "_label": "At Sea"
        }
      ],
      "carried_out_by": [
        {
          "type": "Person",
          "_label": "Fisher"
        }
      ],
      
      "encountered": [
        {
          "type": "HumanMadeObject",
          "_label": "Lost Sculpture",
          "classified_as": [
            {
              "id": "http://vocab.getty.edu/aat/300047090",
              "type": "Type",
              "_label": "Sculpture",
              "classified_as": [
                {
                  "id": "http://vocab.getty.edu/aat/300435443",
                  "type": "Type",
                  "_label": "Type of Work"
                }
              ]
            }
          ]
        }
      ]
</pre>


## I.Sicily Epidoc

.//sourceDesc/msDesc/msIdentifier

./country
./region
./settlement
./repository @role @ref
./idno @type



In [8]:
""" <sourceDesc>
                <msDesc>
                    <msIdentifier>
                        <country>Italy</country>
                        <region>Sicily</region>
                        <settlement>Palermo</settlement>
                        <repository role="museum" ref="http://sicily.classics.ox.ac.uk/museum/064">Museo Archeologico Regionale Antonino Salinas</repository>
                        <idno type="inventory">3501</idno>
                        <altIdentifier>
                            <settlement/>
                            <repository/>
                            <idno type="old"/>
                        </altIdentifier>
                    </msIdentifier>"""


# Palermo, Italy, Museo Archeologico Regionale Antonino Salinas , inventory number 3501

last_country = root.find(".//tei:msIdentifier/tei:country", ns)
last_region = root.find(".//tei:msIdentifier/tei:region", ns)
last_settlement = root.find(".//tei:msIdentifier/tei:settlement", ns)
last_repository = root.find(".//tei:msIdentifier/tei:repository", ns)

objlabel = root.find("./tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title",ns)
objtype = root.find(".//tei:objectType",ns)


doc["part"] = [
    {
      "type": "Encounter",
      "_label": "Last Recorded Location",
      "took_place_at": [
        {
        "type": "Place",
        "_label": last_settlement.text,
        "classified_as" : [{
              "id": "http://vocab.getty.edu/page/aat/300008346",
              "type": "Type",
              "_label": "Settlement"
            }]
        },
        
        {
        "type": "Place",
        "_label": last_country.text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300387506",
              "type": "Type",
              "_label": "Country"
            }]
        },
          
        {
        "type": "Place",
        "_label": last_region.text,
        "classified_as": [{
              "id": "http://vocab.getty.edu/page/aat/300182722",
              "type": "Type",
              "_label": "Region"
            }]
        }]},
    # objtype.attrib["ref"]
    # objtype.text
    {"encountered" : [
        {
          "type": "HumanMadeObject",
          "_label": objlabel.text,
          "classified_as": [
            {
              "id": "",
              "type": "Type",
              "_label": "",
              "classified_as": [
                {
                  "id": "http://vocab.getty.edu/aat/300435443",
                  "type": "Type",
                  "_label": "Type of Work"
                }
              ]
            }
          ]
        }
      ]}
          
          
      
    
     

]

print(json.dumps(doc,indent=2))

{
  "@context": "https://linked.art/ns/v1/linked-art.json",
  "id": "http://sicily.classics.ox.ac.uk/inscription/ISic003944",
  "type": "HumanMadeObject",
  "label": "I.Sicily inscription 003944",
  "identified_by": [
    {
      "type": "Name",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/aat/300404670",
          "type": "Type",
          "_label": "I.Sicily inscription 003944"
        }
      ]
    },
    {
      "type": "Identifier",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/page/aat/300404626",
          "type": "Type",
          "_label": "filename"
        }
      ],
      "content": "ISic003944"
    },
    {
      "type": "Identifier",
      "classified_as": [
        {
          "id": "http://vocab.getty.edu/page/aat/300404626",
          "type": "Type",
          "_label": "TM"
        }
      ],
      "content": null
    },
    {
      "type": "Identifier",
      "classified_as": [
        {
          "id": "http

In [9]:
# write to file 

output_filepath = "./data/output/isicily/"

xpath = ".//tei:idno[@type='filename']"
id = root.find(xpath,ns )

text_file = open(output_filepath + id.text + ".json", "wt")
text_file.write(json.dumps(doc,indent=2))
text_file.close()

IndentationError: unexpected indent (<ipython-input-9-d4c817e1e4f5>, line 3)