Skip to content

Commit

Permalink
Added serialisation check as a default in validator (not just when -o…
Browse files Browse the repository at this point in the history
… is enabled) (proycon/folia#72). Reimplemented --quick, but this now skips RelaxNG and only does a library read pass.
  • Loading branch information
proycon committed Mar 26, 2019
1 parent b2d230c commit 747b526
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 15 deletions.
2 changes: 1 addition & 1 deletion folia
Submodule folia updated 85 files
+0 −1 .travis.yml
+18 −8 README.rst
+ docs/folia.pdf
+3 −3 docs/folia.tex
+5 −3 docs/source/alternative_annotation.rst
+2 −6 docs/source/annotation_declarations.rst
+17 −8 docs/source/annotation_types.rst
+1 −1 docs/source/chunking_annotation.rst
+1 −1 docs/source/comment_annotation.rst
+19 −7 docs/source/conf.py
+2 −1 docs/source/content_annotation_category.rst
+1 −1 docs/source/coreference_annotation.rst
+1 −1 docs/source/correction_annotation.rst
+2 −40 docs/source/definition_annotation.rst
+1 −1 docs/source/dependency_annotation.rst
+1 −1 docs/source/description_annotation.rst
+1 −0 docs/source/division_annotation.rst
+8 −2 docs/source/entity_annotation.rst
+228 −7 docs/source/entry_annotation.rst
+2 −1 docs/source/event_annotation.rst
+2 −41 docs/source/example_annotation.rst
+0 −6 docs/source/external.rst
+3 −2 docs/source/features.rst
+4 −2 docs/source/figure_annotation.rst
+35 −0 docs/source/foreign_annotation.rst
+18 −18 docs/source/fql.rst
+1 −1 docs/source/gap_annotation.rst
+46 −0 docs/source/guidelines.rst
+2 −1 docs/source/head_annotation.rst
+57 −0 docs/source/hiddentoken_annotation.rst
+2 −1 docs/source/higherorder_annotation_category.rst
+38 −2 docs/source/hyperlinks.rst
+58 −0 docs/source/hyphenation_annotation.rst
+41 −0 docs/source/implementations.rst
+14 −23 docs/source/index.rst
+2 −1 docs/source/inline_annotation_category.rst
+41 −15 docs/source/introduction.rst
+45 −0 docs/source/libraries.csv
+4 −3 docs/source/linebreak_annotation.rst
+3 −1 docs/source/list_annotation.rst
+17 −3 docs/source/metadata.rst
+1 −1 docs/source/metric_annotation.rst
+2 −1 docs/source/note_annotation.rst
+1 −1 docs/source/observation_annotation.rst
+3 −2 docs/source/paragraph_annotation.rst
+24 −7 docs/source/part_annotation.rst
+2 −2 docs/source/phon_annotation.rst
+12 −5 docs/source/provenance_data.rst
+2 −1 docs/source/quote_annotation.rst
+8 −5 docs/source/reference_annotation.rst
+4 −4 docs/source/relation_annotation.rst
+1 −1 docs/source/semrole_annotation.rst
+3 −2 docs/source/sentence_annotation.rst
+1 −1 docs/source/sentiment_annotation.rst
+4 −5 docs/source/set_definitions.rst
+12 −1 docs/source/span_annotation_category.rst
+2 −2 docs/source/speech.rst
+3 −3 docs/source/string_annotation.rst
+4 −1 docs/source/structure_annotation_category.rst
+2 −1 docs/source/subtoken_annotation_category.rst
+2 −2 docs/source/syntax_annotation.rst
+5 −1 docs/source/table_annotation.rst
+2 −41 docs/source/term_annotation.rst
+2 −2 docs/source/text_annotation.rst
+4 −1 docs/source/textmarkup_annotation_category.rst
+1 −2 docs/source/token_annotation.rst
+2 −1 docs/source/utterance_annotation.rst
+3 −2 docs/source/whitespace_annotation.rst
+4 −0 examples/README.md
+96 −0 examples/erroneous/invalid-wref.2.0.0.folia.xml
+91 −0 examples/group-annotations.2.0.0.folia.xml
+1 −1 examples/provenance.2.0.0.folia.xml
+13 −13 examples/quotes.2.0.0.folia.xml
+197 −0 examples/spacy-core-web-sm-en.2.0.1.folia.xml
+1 −1 examples/speech.2.0.0.folia.xml
+96 −0 examples/syntactic-movement.2.0.0.folia.xml
+40 −0 examples/tests/provenance-flat-explicit.2.0.0.folia.xml
+32 −0 examples/tests/provenance-flat-implicit.2.0.0.folia.xml
+36 −0 examples/tests/provenance-nested-implicit.2.0.0.folia.xml
+6 −0 examples/whitespace-linebreaks.2.0.0.folia.xml
+1,581 −114 schemas/folia.rng
+61 −28 schemas/folia.yml
+132 −18 setdefinitions/namedentities.foliaset.ttl
+5 −1 setdefinitions/namedentities.foliaset.xml
+82 −0 setdefinitions/universal-pos.foliaset.ttl
39 changes: 25 additions & 14 deletions foliatools/foliavalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,21 @@


def validate(filename, schema = None,**kwargs):
try:
folia.validate(filename, schema)
except Exception as e:
print("VALIDATION ERROR against RelaxNG schema (stage 1/2), in " + filename,file=sys.stderr)
print(str(e), file=sys.stderr)
return False
if not kwargs.get('quick'):
try:
folia.validate(filename, schema)
except Exception as e:
print("VALIDATION ERROR against RelaxNG schema (stage 1/3), in " + filename,file=sys.stderr)
print(str(e), file=sys.stderr)
return False
try:
document = folia.Document(file=filename, deepvalidation=kwargs.get('deep',False),textvalidation=kwargs.get('stricttextvalidation',False),verbose=True, autodeclare=kwargs.get('autodeclare',False), processor=kwargs.get('processor'), debug=kwargs.get('debug',0))
except folia.DeepValidationError as e:
print("DEEP VALIDATION ERROR on full parse by library (stage 2/2), in " + filename,file=sys.stderr)
print("DEEP VALIDATION ERROR on full parse by library (stage 2/3), in " + filename,file=sys.stderr)
print(e.__class__.__name__ + ": " + str(e),file=sys.stderr)
return False
except Exception as e:
print("VALIDATION ERROR on full parse by library (stage 2/2), in " + filename,file=sys.stderr)
print("VALIDATION ERROR on full parse by library (stage 2/3), in " + filename,file=sys.stderr)
print(e.__class__.__name__ + ": " + str(e),file=sys.stderr)
if kwargs.get('traceback') or kwargs.get('debug'):
print("-- Full traceback follows -->",file=sys.stderr)
Expand All @@ -42,15 +43,25 @@ def validate(filename, schema = None,**kwargs):
print("WARNING: Document (" + filename + ") uses an older FoLiA version ("+document.version+") but is validated according to the newer specification (" + folia.FOLIAVERSION+"). You might want to increase the version attribute if this is a document you created and intend to publish.",file=sys.stderr)
if document.textvalidationerrors:
if kwargs.get('stricttextvalidation'):
print("VALIDATION ERROR because of text validation errors, in " + filename,file=sys.stderr)
print("VALIDATION ERROR because of text validation errors (stage 2/3), in " + filename,file=sys.stderr)
return False
elif not kwargs.get('nowarn'):
print("WARNING: there were " + str(document.textvalidationerrors) + " text validation errors but these are currently not counted toward the full validation result (use -t for strict text validation)", file=sys.stderr)

if kwargs.get('output'):
print(document.xmlstring())
if not kwargs.get('quick'):
try:
if kwargs.get('output'):
print(document.xmlstring())
else:
document.xmlstring()
except Exception as e:
print("SERIALISATION ERROR (stage 3/3): Document validated succesfully but failed to serialise! (" + filename + "). This may be indicative of a problem in the underlying library, please submit an issue on https://github.com/proycon/foliapy with the output of this error.",file=sys.stderr)
print(e.__class__.__name__ + ": " + str(e),file=sys.stderr)
print("-- Full traceback follows -->",file=sys.stderr)
ex_type, ex, tb = sys.exc_info()
traceback.print_exception(ex_type, ex, tb)
return False
if kwargs.get('autodeclare'):
print("Validated successfully **after** applying auto-declarations: " + filename,file=sys.stderr)
print("Validated successfully **after** applying auto-declarations: " + filename + " (this is not a guarantee that the original file is valid but indicates it can be automatically made valid!)", file=sys.stderr)
return document
else:
print("Validated successfully: " + filename,file=sys.stderr)
Expand All @@ -76,7 +87,7 @@ def commandparser(parser):
parser.add_argument('-d','--deep',help="Enable deep validation; validated uses classes against provided set definitions", action='store_true', default=False)
parser.add_argument('-r','--recurse',help="Process recursively", action='store_true', default=False)
parser.add_argument('-a','--autodeclare',help="Attempt to automatically declare missing annotations", action='store_true', default=False)
parser.add_argument('-q','--quick',help="Quick and more shallow validation, only validates against RelaxNG schema. This does not constitute a complete enough validation!", action='store_true', default=False)
parser.add_argument('-q','--quick',help="Quicker validation; skips RelaxNG validation and serialisation checks", action='store_true', default=False)
parser.add_argument('-E','--extension', type=str,help="Extension", action='store',default="xml")
parser.add_argument('-W','--nowarn',help="Suppress warnings", action='store_true', default=False)
parser.add_argument('-i','--ignore',help="Always report a successful exit code, even in case of validation errors", action='store_true', default=False)
Expand Down

0 comments on commit 747b526

Please sign in to comment.