In [1]:
from lxml import etree



In [11]:
from lxml import etree

# Parse the input XML file
input_file = 'Apparatus III Encoding.xml'
tree = etree.parse(input_file)
root = tree.getroot()

# Create a new TEI XML tree with header
tei_root = etree.Element("TEI", xmlns="http://www.tei-c.org/ns/1.0")
tei_tree = etree.ElementTree(tei_root)

# Create TEI header
tei_header = etree.SubElement(tei_root, "teiHeader")
file_desc = etree.SubElement(tei_header, "fileDesc")
title_stmt = etree.SubElement(file_desc, "titleStmt")
title = etree.SubElement(title_stmt, "title")
title.text = "Converted Apparatus"
author = etree.SubElement(title_stmt, "author")
author.text = "Original Author"
publication_stmt = etree.SubElement(file_desc, "publicationStmt")
publisher = etree.SubElement(publication_stmt, "publisher")
publisher.text = "Publisher Name"
pub_place = etree.SubElement(publication_stmt, "pubPlace")
pub_place.text = "Publication Place"
date = etree.SubElement(publication_stmt, "date")
date.text = "2024"
source_desc = etree.SubElement(file_desc, "sourceDesc")
bibl = etree.SubElement(source_desc, "bibl")
bibl.text = "Source Bibliographic Information"

# Define the conversion logic for the apparatus
for entry in root.findall('entry'):
    app = etree.SubElement(tei_root, "app", n=entry.get("Number"))
    
    # Process LemmaInfo
    lemma_info = entry.find('LemmaInfo')
    for lemma in lemma_info.find('Lemmas').findall('Lemma'):
        lem_text = lemma.find('Text')
        lem = etree.SubElement(app, "lem")
        lem.text = lem_text.text if lem_text is not None else ""
    
    # Process DecodedEntry
    decoded_entry = entry.find('DecodedEntry')
    rdg = etree.SubElement(app, "rdg")
    
    details = decoded_entry.find('Details')
    if details is not None:
        witnesses = details.find('Witnesses')
        if witnesses is not None:
            for witness in witnesses.findall('Witness'):
                wit_text = witness.find('Manuscript')
                wit = etree.SubElement(rdg, "wit")
                wit.text = wit_text.text if wit_text is not None else ""
                
                comment_text = witness.find('Comment')
                if comment_text is not None and comment_text.text:
                    note = etree.SubElement(rdg, "note")
                    note.text = comment_text.text
        
        reading = details.find('Rdg').find('Reading')
        if reading is not None and reading.text:
            rdg.text = reading.text
        
        sigla = details.find('Rdg').find('Sigla')
        if sigla is not None and sigla.text:
            sig = etree.SubElement(rdg, "seg", type="sigla")
            sig.text = sigla.text
        
        comment = details.find('Rdg').find('Comment')
        if comment is not None and comment.text:
            note = etree.SubElement(rdg, "note")
            note.text = comment.text
        
        cross_refs = details.find('CrossReferences')
        if cross_refs is not None:
            for ref in cross_refs.findall('Reference'):
                cross_ref = etree.SubElement(rdg, "ref")
                cross_ref.text = ref.text

# Save the new TEI XML tree to a file
output_file = 'Converted_Apparatus_TEI.xml'
tei_tree.write(output_file, pretty_print=True, xml_declaration=True, encoding="UTF-8")

print(f"Converted XML saved to {output_file}")


Converted XML saved to Converted_Apparatus_TEI.xml


In [10]:
from lxml import etree

def convert_element(element):
    if element.tag == '{http://www.tei-c.org/ns/1.0}lg' and element.attrib.get('type') == 'verse':
        ab = etree.Element("ab", n=element.get("n"))
        for child in element:
            ab.append(convert_element(child))
        return ab
    elif element.tag == '{http://www.tei-c.org/ns/1.0}w':
        w = etree.Element("w")
        for child in element:
            if child.tag == '{http://www.tei-c.org/ns/1.0}nestedWord':
                if child.text:
                    w.text = (w.text or '') + child.text
                for grandchild in child:
                    w.append(convert_element(grandchild))
            elif child.tag == '{http://www.tei-c.org/ns/1.0}taam':
                seg = etree.Element("seg", type="taam")
                seg.text = child.text
                w.append(seg)
            elif child.tag == '{http://www.tei-c.org/ns/1.0}specialSign':
                seg = etree.Element("seg", type="specialSign")
                seg.text = child.text
                w.append(seg)
            else:
                w.append(convert_element(child))
        w.text = (w.text or '') + (element.text or '')
        return w
    elif element.tag == '{http://www.tei-c.org/ns/1.0}taam':
        seg = etree.Element("seg", type="taam")
        seg.text = element.text
        return seg
    elif element.tag == '{http://www.tei-c.org/ns/1.0}specialSign':
        seg = etree.Element("seg", type="specialSign")
        seg.text = element.text
        return seg
    elif element.tag == '{http://www.tei-c.org/ns/1.0}divider':
        pb = etree.Element("pb", n=element.text)
        return pb
    return element

# Parse the input XML file
input_file = 'output.tei.xml'
tree = etree.parse(input_file)
root = tree.getroot()

# Create a new TEI XML tree with header
tei_root = etree.Element("TEI", xmlns="http://www.tei-c.org/ns/1.0")
tei_tree = etree.ElementTree(tei_root)

# Copy the header
tei_header = etree.SubElement(tei_root, "teiHeader")
tei_header.extend(root.find('{http://www.tei-c.org/ns/1.0}teiHeader'))

# Process the text
text = etree.SubElement(tei_root, "text")
body = etree.SubElement(text, "body")

for div in root.find('{http://www.tei-c.org/ns/1.0}text').find('{http://www.tei-c.org/ns/1.0}body').findall('{http://www.tei-c.org/ns/1.0}div'):
    new_div = etree.SubElement(body, "div", type=div.get("type"), n=div.get("n"))
    for child in div:
        new_div.append(convert_element(child))

# Save the new TEI XML tree to a file
output_file = 'Converted_Main_Text_TEI.xml'
tei_tree.write(output_file, pretty_print=True, xml_declaration=True, encoding="UTF-8")

print(f"Converted XML saved to {output_file}")


Converted XML saved to Converted_Main_Text_TEI.xml


In [12]:
from lxml import etree

# Load the XML and XSL files
xml_file = 'Converted_Apparatus_TEI.xml'
xslt_file = 'tei_to_html.xsl'

# Parse the XML and XSLT files
xml_tree = etree.parse(xml_file)
xslt_tree = etree.parse(xslt_file)
transform = etree.XSLT(xslt_tree)

# Apply the transformation
result_tree = transform(xml_tree)

# Save the result to an HTML file
html_output_file = 'Converted_Apparatus_TEI.html'
result_tree.write(html_output_file, pretty_print=True, encoding='UTF-8')

print(f"HTML file created: {html_output_file}")


HTML file created: Converted_Apparatus_TEI.html
