In [9]:
"""Convert Ben Sira XML to HTML for proof-reading.
"""
from pathlib import Path
from xml.etree import ElementTree


for file_path in Path("../xml_tei_revised").glob("*.xml"):
    print(f"Converting {file_path} to HTML...")
    parsed_manuscript = ElementTree.parse(file_path).getroot()

    # Create a new HTML file
    with open(f"htmls/{Path(file_path).stem}.html", "w") as html_file:
        # Iterate over the XML elements
        for element in parsed_manuscript.iter():
            if element.tag == "ms":
                html_file.write(f"""<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="styles.css">\n<title>{element.attrib['name']}</title>\n</head>\n<body><h1>{element.attrib['name']}</h1>\n""")

            if element.tag == "div":
                if element.attrib["type"] == "chap":
                    html_file.write(f"<h2>Chapter {element.attrib['n']}</h2>")
                if element.attrib["type"] == "verse":
                    html_file.write(f"<p dir=\"rtl\"><b id=\"verse\">{element.attrib['n']}</b>\n")
                    in_reconstructed = False #test pour formatage des reconstructed
                    reconstructed_buffer = "" #chaine reconstuite
                    for child in element:
                        if child.tag == "line":
                            html_file.write(f"<sup id=\"line\">&#x200F;{child.attrib['n']}</sup>")
                        if child.tag == "stich":
                            html_file.write(f"<span style=\"display:inline-block; width: 30px;\"></span>")
                        if child.tag == "w":        
                            rec = child.attrib.get("reconstructed", "0")
                            # Si c'est un mot reconstruit
                            if rec == "1":
                                if not in_reconstructed:
                                    in_reconstructed = True
                                    reconstructed_buffer += "["
                                # Ajoute le texte du mot reconstruit et de ses <g>
                                if child.text:
                                    reconstructed_buffer += child.text
                                for g in child.findall("g"):
                                    if g.text:
                                        reconstructed_buffer += g.text
                                # Ajoute un espace si ce n'est pas le dernier mot reconstruit
                                reconstructed_buffer += " "
                            else:
                                # Si on sort d'une séquence reconstruite, ferme le crochet
                                if in_reconstructed:
                                    reconstructed_buffer = reconstructed_buffer.rstrip() + "] "
                                    html_file.write(reconstructed_buffer)
                                    reconstructed_buffer = ""
                                    in_reconstructed = False
                                # Ajoute le mot normal
                                if child.text:
                                    html_file.write(f"{child.text} ")
                        # Si on arrive à la fin, ferme le crochet si besoin
                    if in_reconstructed:
                        reconstructed_buffer = reconstructed_buffer.rstrip() + "] "
                        html_file.write(reconstructed_buffer)
                    html_file.write(f"</p>\n")
                            



Converting ../xml_tei_revised/ms_b.xml to HTML...
Converting ../xml_tei_revised/ms_c.xml to HTML...
Converting ../xml_tei_revised/ms_a.xml to HTML...
Converting ../xml_tei_revised/ms_d.xml to HTML...
Converting ../xml_tei_revised/ms_e.xml to HTML...
Converting ../xml_tei_revised/ms_f.xml to HTML...
Converting ../xml_tei_revised/ms_11QPsa.xml to HTML...
Converting ../xml_tei_revised/ms_massada.xml to HTML...
Converting ../xml_tei_revised/ms_2Q18.xml to HTML...
