<a href="https://colab.research.google.com/github/michael-wettach/pythonsamples/blob/main/xml_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Jetzt wollen wir ein XML validieren.

In [14]:
from lxml import etree
from io import StringIO

# open and read schema file
with open('element_sample_ext.xsd', 'r') as schema_file:
    schema_to_check = schema_file.read()

# open and read xml file
with open('element_sample_invalid3.xml', 'r') as xml_file:
    xml_to_check = xml_file.read()

# parse the schema (assume well-formed schema, skip error checking)
xmlschema_doc = etree.parse(StringIO(schema_to_check))
xmlschema = etree.XMLSchema(xmlschema_doc)
print("XML schema was parsed.")

# parse the XML document (check for XML syntax before validation)
try:
    doc = etree.parse(StringIO(xml_to_check))
    print('XML well formed, syntax ok.')

# check for file IO error
except IOError:
    print('Invalid File')
    raise

# check for XML syntax errors
except etree.XMLSyntaxError as err:
    print('XML Syntax Error, see error_syntax.log')
    with open('error_syntax.log', 'w') as error_log_file:
        error_log_file.write(str(err.error_log))
    raise

except:
    print('Unknown error in parsing XML, exiting.')
    raise

# now that doc parsed successfully, validate against schema
try:
    xmlschema.assertValid(doc)
    print('XML valid, schema validation ok.')

except etree.DocumentInvalid as err:
    print('Schema validation error, see error_schema.log')
    with open('error_schema.log', 'w') as error_log_file:
        error_log_file.write(str(err.error_log))
    raise

except:
    print('Unknown error in validating XML, exiting.')
    raise


XML schema was parsed.
XML well formed, syntax ok.
Schema validation error, see error_schema.log


DocumentInvalid: ignored