Skip to content

Commit

Permalink
work on foliaupgrade and changed help text for stricttextvalidation o…
Browse files Browse the repository at this point in the history
…ption in foliavalidator
  • Loading branch information
proycon committed Mar 25, 2019
1 parent 041d4b1 commit d131bb4
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 5 deletions.
2 changes: 1 addition & 1 deletion folia
Submodule folia updated 85 files
+1 −0 .travis.yml
+8 −18 README.rst
+ docs/folia.pdf
+3 −3 docs/folia.tex
+3 −5 docs/source/alternative_annotation.rst
+6 −2 docs/source/annotation_declarations.rst
+8 −17 docs/source/annotation_types.rst
+1 −1 docs/source/chunking_annotation.rst
+1 −1 docs/source/comment_annotation.rst
+7 −19 docs/source/conf.py
+1 −2 docs/source/content_annotation_category.rst
+1 −1 docs/source/coreference_annotation.rst
+1 −1 docs/source/correction_annotation.rst
+40 −2 docs/source/definition_annotation.rst
+1 −1 docs/source/dependency_annotation.rst
+1 −1 docs/source/description_annotation.rst
+0 −1 docs/source/division_annotation.rst
+2 −8 docs/source/entity_annotation.rst
+7 −228 docs/source/entry_annotation.rst
+1 −2 docs/source/event_annotation.rst
+41 −2 docs/source/example_annotation.rst
+6 −0 docs/source/external.rst
+2 −3 docs/source/features.rst
+2 −4 docs/source/figure_annotation.rst
+0 −35 docs/source/foreign_annotation.rst
+18 −18 docs/source/fql.rst
+1 −1 docs/source/gap_annotation.rst
+0 −46 docs/source/guidelines.rst
+1 −2 docs/source/head_annotation.rst
+0 −57 docs/source/hiddentoken_annotation.rst
+1 −2 docs/source/higherorder_annotation_category.rst
+2 −38 docs/source/hyperlinks.rst
+0 −58 docs/source/hyphenation_annotation.rst
+0 −41 docs/source/implementations.rst
+23 −14 docs/source/index.rst
+1 −2 docs/source/inline_annotation_category.rst
+15 −41 docs/source/introduction.rst
+0 −45 docs/source/libraries.csv
+3 −4 docs/source/linebreak_annotation.rst
+1 −3 docs/source/list_annotation.rst
+3 −17 docs/source/metadata.rst
+1 −1 docs/source/metric_annotation.rst
+1 −2 docs/source/note_annotation.rst
+1 −1 docs/source/observation_annotation.rst
+2 −3 docs/source/paragraph_annotation.rst
+7 −24 docs/source/part_annotation.rst
+2 −2 docs/source/phon_annotation.rst
+5 −12 docs/source/provenance_data.rst
+1 −2 docs/source/quote_annotation.rst
+5 −8 docs/source/reference_annotation.rst
+4 −4 docs/source/relation_annotation.rst
+1 −1 docs/source/semrole_annotation.rst
+2 −3 docs/source/sentence_annotation.rst
+1 −1 docs/source/sentiment_annotation.rst
+5 −4 docs/source/set_definitions.rst
+1 −12 docs/source/span_annotation_category.rst
+2 −2 docs/source/speech.rst
+3 −3 docs/source/string_annotation.rst
+1 −4 docs/source/structure_annotation_category.rst
+1 −2 docs/source/subtoken_annotation_category.rst
+2 −2 docs/source/syntax_annotation.rst
+1 −5 docs/source/table_annotation.rst
+41 −2 docs/source/term_annotation.rst
+2 −2 docs/source/text_annotation.rst
+1 −4 docs/source/textmarkup_annotation_category.rst
+2 −1 docs/source/token_annotation.rst
+1 −2 docs/source/utterance_annotation.rst
+2 −3 docs/source/whitespace_annotation.rst
+0 −4 examples/README.md
+0 −96 examples/erroneous/invalid-wref.2.0.0.folia.xml
+0 −91 examples/group-annotations.2.0.0.folia.xml
+1 −1 examples/provenance.2.0.0.folia.xml
+13 −13 examples/quotes.2.0.0.folia.xml
+0 −197 examples/spacy-core-web-sm-en.2.0.1.folia.xml
+1 −1 examples/speech.2.0.0.folia.xml
+0 −96 examples/syntactic-movement.2.0.0.folia.xml
+0 −40 examples/tests/provenance-flat-explicit.2.0.0.folia.xml
+0 −32 examples/tests/provenance-flat-implicit.2.0.0.folia.xml
+0 −36 examples/tests/provenance-nested-implicit.2.0.0.folia.xml
+0 −6 examples/whitespace-linebreaks.2.0.0.folia.xml
+251 −1,718 schemas/folia.rng
+28 −61 schemas/folia.yml
+18 −132 setdefinitions/namedentities.foliaset.ttl
+1 −5 setdefinitions/namedentities.foliaset.xml
+0 −82 setdefinitions/universal-pos.foliaset.ttl
32 changes: 30 additions & 2 deletions foliatools/foliaupgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,31 @@ def main():
success = process(*args.files, **args.__dict__)
sys.exit(0 if success else 1)

def annotators2processors(doc, mainprocessor):
"""Convert FoLiA v1 style annotators to v2 style processors (limited)"""
for element in doc.items():
if isinstance(element, folia.AbstractElement):
if element.annotator:
if element.annotatortype == folia.ProcessorType.MANUAL:
annotatortype = folia.ProcessorType.MANUAL
else:
annotatortype = folia.ProcessorType.AUTO
foundprocessor = None
for processor in doc.getprocessors(element.ANNOTATIONTYPE, element.set):
if element.annotator == processor.name and annotatortype == processor.type:
foundprocessor = processor
if foundprocessor:
element.processor = foundprocessor
else:
#Create a new processor
newprocessor = folia.Processor(element.annotator, type=annotatortype)
mainprocessor.append(newprocessor)
element.setprocessor(newprocessor)
#delete the old style annotator
element.annotator = None
element.annotatortype = None


def process(*files, **kwargs):
success = False
for file in files:
Expand All @@ -32,18 +57,21 @@ def process(*files, **kwargs):
if r != 0:
success = False
elif os.path.isfile(file):
doc = validate(file,schema=None, stricttextvalidation=True,autodeclare=True,output=False, warn=False, **kwargs)
doc.provenance.append(folia.Processor.create(name="foliaupgrade", version=VERSION))
mainprocessor = folia.Processor.create(name="foliaupgrade", version=VERSION)
doc = validate(file,schema=None, stricttextvalidation=True,autodeclare=True,output=False, warn=False,processor=mainprocessor,traceback=True,**kwargs)
if doc is not False:
print("Upgrading " + doc.filename,file=sys.stderr)
doc.version = folia.FOLIAVERSION #upgrading involves more than just bumping the number, but that is handled implicitly already by the library when reading the document
annotators2processors(doc, mainprocessor)
if not kwargs.get('dryrun'):
doc.save(doc.filename + ".upgraded")
if not validate(file + ".upgraded",schema=None,stricttextvalidation=True,autodeclare=True,**kwargs):
print("Upgrade failed",file=sys.stderr)
success = False
else:
shutil.move(doc.filename + ".upgraded", file)
else:
print(doc.xmlstring())
else:
success = False
return success
Expand Down
4 changes: 2 additions & 2 deletions foliatools/foliavalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def validate(filename, schema = None,**kwargs):
print(str(e), file=sys.stderr)
return False
try:
document = folia.Document(file=filename, deepvalidation=kwargs.get('deep',False),textvalidation=kwargs.get('stricttextvalidation',False),verbose=True, autodeclare=kwargs.get('autodeclare',False), debug=kwargs.get('debug'))
document = folia.Document(file=filename, deepvalidation=kwargs.get('deep',False),textvalidation=kwargs.get('stricttextvalidation',False),verbose=True, autodeclare=kwargs.get('autodeclare',False), processor=kwargs.get('processor'), debug=kwargs.get('debug',0))
except folia.DeepValidationError as e:
print("DEEP VALIDATION ERROR on full parse by library (stage 2/2), in " + filename,file=sys.stderr)
print(e.__class__.__name__ + ": " + str(e),file=sys.stderr)
Expand Down Expand Up @@ -80,7 +80,7 @@ def commandparser(parser):
parser.add_argument('-E','--extension', type=str,help="Extension", action='store',default="xml")
parser.add_argument('-W','--nowarn',help="Suppress warnings", action='store_true', default=False)
parser.add_argument('-i','--ignore',help="Always report a successful exit code, even in case of validation errors", action='store_true', default=False)
parser.add_argument('-t','--stricttextvalidation',help="Treat text validation errors strictly (recommended and default for FoLiA v1.5+)", action='store_true', default=False)
parser.add_argument('-t','--stricttextvalidation',help="Treat text validation errors strictly for FoLiA < v1.5, it always enabled for for FoLiA v1.5+ regardless of this parameter", action='store_true', default=False)
parser.add_argument('-o','--output',help="Output document to stdout", action='store_true', default=False)
parser.add_argument('-D','--debug',type=int,help="Debug level", action='store',default=0)
parser.add_argument('-b','--traceback',help="Provide a full traceback on validation errors", action='store_true', default=False)
Expand Down

0 comments on commit d131bb4

Please sign in to comment.