<h1>Convertisseur pour Graph-East XML->PDF</h1>

<p>Ce fichier vous permettra de convertir vos données XML en PDF
Cette transformation à lieu en 2 étapes. D'abord vous devrez transformer votre fichier en HTML puis en LateX</p>

<h4>Convertisseur XML->HTML</h4>

In [1]:
import xml.etree.ElementTree as ET

# Chemin vers le fichier XML
file_path = '#le_nom_de_votre_fichier.xml'

# Parse le fichier XML
try:
    tree = ET.parse(file_path)
    root = tree.getroot()
    print("XML file parsed successfully.")
except Exception as e:
    print(f"Failed to parse XML file: {e}")

# Namespace utilisé dans le fichier XML
namespace = {'hml': 'https://heuristnetwork.org'}

# Dictionnaire pour les titres de détails
detail_titles = {
    "2-1": "Title",
    "2-39": "Image",
    "9-78": "Current location",
    "1201-514": "Date recorded",
    "0-1052": "Status",
    "0-1055": "Context",
    "0-1057": "Dating by century",
    "0-1058": "Dating criteria",
    "0-1059": "State of preservation",
    "0-1061": "Precise location",
    "0-1062": "Position",
    "0-1063": "Orientation",
    "0-1064": "Height (cm)",
    "0-1065": "Orientation support",
    "0-1066": "Conservation context",
    "0-1067": "Graphic environment",
    "0-1070": "Architectural support",
    "0-1073": "General description",
    "0-1074": "Material",
    "0-1075": "Execution technique",
    "0-1076": "Type of lettering",
    "0-1077": "Paleographical description",
    "0-1078": "General layout of the text",
    "0-1082": "Height of long letters",
    "0-1086": "Details of measured letters",
    "0-1116": "Commentaire technique",
    "0-1123": "Original location",
    "0-1126": "Heurist ID",
    "0-1128": "Other remarks",
    "0-1129": "Archaeological context",
    "0-1140": "Bibliography",
    "0-1152": "Edition unit",
    "0-1164": "Coat of arms",
    "0-1167": "Graph-east ID",
    # etc.
}

# Liste des conceptID à exclure
excluded_concept_ids = ["2-1", "0-1142", "2-39", "9-78", "1201-514", "0-1067", "0-1077", "0-1078", "0-1082", "0-1086", "0-1116", "0-1120", "0-1122", "0-1123", "0-1140", "0-1152", "0-1164", "0-1167"]

# Liste d'ordre des ConceptID
order_of_concept_ids = [
    "2-1", "2-39", "9-78", "1201-514", "0-1052", "0-1055", "0-1057", "0-1058", "0-1059",
    "0-1061", "0-1062", "0-1063", "0-1064", "0-1065", "0-1066", "0-1067", "0-1070",
    "0-1073", "0-1074", "0-1075", "0-1076", "0-1077", "0-1078", "0-1082", "0-1086",
    "0-1116", "0-1123", "0-1126", "0-1128", "0-1129", "0-1140", "0-1152", "0-1164", "0-1167"
]

# Fonction pour extraire toutes les données des enregistrements
def extract_records(root):
    records_data = []

    for record in root.findall('hml:records/hml:record', namespace):
        record_data = {}
        record_data['id'] = record.find('hml:id', namespace).text
        record_data['type'] = record.find('hml:type', namespace).text
        record_data['title'] = record.find('hml:title', namespace).text

        details = record.findall('hml:detail', namespace)
        record_data['details'] = {}
        heurist_id = None

        for detail in details:
            detail_text = ' '.join(detail.text.split()).strip() if detail.text else ""
            concept_id = detail.get('conceptID')
            term_concept_id = detail.get('termConceptID')

            # Vérifier si le concept_id est dans la liste d'exclusion
            if concept_id in excluded_concept_ids:
                continue

            # Trouver le titre approprié pour le détail
            title = detail_titles.get(concept_id) or detail_titles.get(term_concept_id) or "Détail Inconnu"

            if title not in record_data['details']:
                record_data['details'][title] = []
            record_data['details'][title].append(detail_text)

            # Trouver l'heurist_id
            if concept_id == "0-1126":
                heurist_id = detail_text

        record_data['heurist_id'] = int(heurist_id) if heurist_id else float('inf')
        records_data.append(record_data)

    return records_data

# Fonction pour générer le HTML à partir des enregistrements
def generate_html(records):
    html_content = "<html>\n<head>\n<title>Records Data</title>\n</head>\n<body>\n"

    for record in records:
        html_content += f"<p>Type: {record['type']}</p>\n"
        html_content += f"<p>Title: {record['title']}</p>\n"

        html_content += "<p>\n"
        for concept_id in order_of_concept_ids:
            title = detail_titles.get(concept_id, "Détail Inconnu")
            if title in record['details']:
                concatenated_details = " ".join(record['details'][title])
                html_content += f"<p>{title}: {concatenated_details}</p>\n"
        html_content += "</p>\n<hr>\n"

    html_content += "</body>\n</html>"

    return html_content

# Extraire les données
try:
    records = extract_records(root)
    print("Records extracted successfully.")
except Exception as e:
    print(f"Failed to extract records: {e}")

# Trier les enregistrements par Heurist ID
try:
    sorted_records = sorted(records, key=lambda x: x['heurist_id'])
    print("Records sorted successfully.")
except Exception as e:
    print(f"Failed to sort records: {e}")

# Générer le HTML
try:
    html_content = generate_html(sorted_records)
    print("HTML content generated successfully.")
except Exception as e:
    print(f"Failed to generate HTML content: {e}")

# Créer un fichier HTML
output_path = 'records_data.html'

try:
    with open(output_path, 'w', encoding='utf-8') as file:
        file.write(html_content)
    print(f"Document HTML créé avec succès : {output_path}")
except Exception as e:
    print(f"Failed to write HTML file: {e}")

Failed to parse XML file: [Errno 2] No such file or directory: 'heurist_clem.xml'
Failed to extract records: name 'root' is not defined
Failed to sort records: name 'records' is not defined
Failed to generate HTML content: name 'sorted_records' is not defined
Failed to write HTML file: [Errno 13] Permission denied: 'records_data.html'
