Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/mideind/GreynirCorrect in…
Browse files Browse the repository at this point in the history
…to agreement
  • Loading branch information
thorunna committed Oct 26, 2023
2 parents 287cdac + 7ec7c8a commit 435678d
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 51 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
python-version: [ "3.8", "3.9", "3.10", "3.11", "pypy-3.9", "pypy-3.10"]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.9", "pypy-3.10"]

steps:
- uses: actions/checkout@v4
Expand All @@ -28,11 +28,11 @@ jobs:
python -m pip install --upgrade pip wheel setuptools pytest
python -m pip install git+https://github.com/mideind/Tokenizer#egg=tokenizer
python -m pip install git+https://github.com/mideind/GreynirPackage#egg=reynir
# The neural sentence classifier doesn't work with pypy so don't try to install it
if [ "${{ matrix.python-version }}" == "pypy-3.9" ] || [ "${{ matrix.python-version }}" == "pypy-3.10" ]; then
python -m pip install -e .
# No need to test the sentence classifier in every build (also doesn't work with PyPy)
if [ "${{ matrix.python-version }}" == "3.8" ]; then
python -m pip install -e ".[sentence_classifier]"
else
python -m pip install -e .[sentence_classifier]
python -m pip install -e ".[dev]"
fi
- name: Typecheck with mypy
run: |
Expand Down
71 changes: 31 additions & 40 deletions eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,38 @@
$ python eval.py -a
"""

from typing import (
TYPE_CHECKING,
Any,
Counter,
DefaultDict,
Dict,
Iterable,
List,
Optional,
Set,
Union,
Tuple,
Iterable,
Union,
cast,
Any,
DefaultDict,
Counter,
)

import os
from collections import defaultdict
from datetime import datetime
import argparse
import glob
import os
import random
import argparse
import xml.etree.ElementTree as ET
from collections import defaultdict
from datetime import datetime

from reynir_correct import (
Annotation,
CorrectedSentence,
CorrectionPipeline,
GreynirCorrect,
GreynirCorrectAPI,
Settings,
)
from tokenizer import TOK, Tok, detokenize

if TYPE_CHECKING:
# For some reason, types seem to be missing from the multiprocessing module
Expand All @@ -116,17 +125,6 @@
else:
import multiprocessing

from reynir import _Sentence
from tokenizer import detokenize, Tok, TOK

from reynir_correct.annotation import Annotation
from reynir_correct.checker import (
GreynirCorrect,
Settings,
AnnotatedSentence,
check as gc_check,
)


# Disable Pylint warnings arising from Pylint not understanding the typing module
# pylint: disable=no-member
Expand Down Expand Up @@ -157,7 +155,8 @@

settings = Settings()
settings.read(os.path.join("config", "GreynirCorrect.conf"))
rc = GreynirCorrect(settings)
gc = GreynirCorrect(settings, pipeline=CorrectionPipeline("", settings=settings))
rc = GreynirCorrectAPI(gc=gc)

# Create a lock to ensure that only one process outputs at a time
OUTPUT_LOCK = multiprocessing.Lock()
Expand Down Expand Up @@ -591,7 +590,7 @@

parser = argparse.ArgumentParser(
description=(
"This program evaluates the spelling and grammar checking performance " "of GreynirCorrect on iceErrorCorpus"
"This program evaluates the spelling and grammar checking performance of GreynirCorrect on iceErrorCorpus"
)
)

Expand Down Expand Up @@ -1687,20 +1686,13 @@ def process(fpath_and_category: Tuple[str, str]) -> Dict[str, Any]:
# Nothing to do: drop this and go to the next sentence
continue
# print(text)
options = {}
options["annotate_unparsed_sentences"] = True # True is default
options["suppress_suggestions"] = False # False is default
options["ignore_rules"] = set(
[
"",
]
)
# Pass it to GreynirCorrect
pg = [list(p) for p in gc_check(text, rc=rc, **options)]
s: Optional[_Sentence] = None
if len(pg) >= 1 and len(pg[0]) >= 1:
s = pg[0][0]
if len(pg) > 1 or (len(pg) == 1 and len(pg[0]) > 1):
result = rc.correct(text=text, suppress_suggestions=False, ignore_rules=set())
pg = result.sentences
s: Optional[CorrectedSentence] = None
if len(pg) >= 1:
s = pg[0]
if len(pg) > 1 or (len(pg) == 1):
# if QUIET:
# bprint(f"In file {fpath}:")
# bprint(
Expand Down Expand Up @@ -1770,8 +1762,8 @@ def sentence_results(
return gc_error, ice_error

assert s is not None
assert isinstance(s, AnnotatedSentence)
gc_error, ice_error = sentence_results(s.annotations, errors)
assert isinstance(s, CorrectedSentence)
gc_error, ice_error = sentence_results(s.annotations or [], errors)

def token_results(
hyp_annotations: Iterable[Annotation],
Expand Down Expand Up @@ -2019,7 +2011,6 @@ def token_results(
wrong_span,
)

assert isinstance(s, AnnotatedSentence)
(
tp,
fp,
Expand All @@ -2031,7 +2022,7 @@ def token_results(
cfn,
right_span,
wrong_span,
) = token_results(s.annotations, errors)
) = token_results(s.annotations or [], errors)
tn = len(tokens) - tp - fp - fn
ctn = len(tokens) - ctp - cfp - cfn
# Collect statistics into the stats list, to be returned
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ classifiers = [
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Software Development :: Libraries :: Python Modules",
Expand Down
15 changes: 9 additions & 6 deletions src/reynir_correct/errfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,8 +544,9 @@ def VillaEinkunn(self, txt: str, variants: str, node: Node) -> AnnotationDict:
return AnnotationDict(
text="'{0}' á sennilega að vera '{1}'".format(wrong_pronoun, correct_pronoun),
detail=(
"Fornafnið '{0}' á að vera í {1}falli, eins og "
"nafnliðurinn sem fylgir á eftir".format(wrong_pronoun, CASE_NAMES[correct_case])
"Fornafnið '{0}' á að vera í {1}falli, eins og " "nafnliðurinn sem fylgir á eftir".format(
wrong_pronoun, CASE_NAMES[correct_case]
)
),
start=start,
end=end,
Expand Down Expand Up @@ -1031,8 +1032,9 @@ def annotate_wrong_obj_case(obj_case_abbr: str, correct_case_abbr: str) -> None:
self._ann.append(
Annotation(
text="Á líklega að vera '{0}'".format(correct_np),
detail="Andlag sagnarinnar {0} á að vera "
"í {1}falli í stað {2}falls.".format(verb, correct_case, wrong_case),
detail="Andlag sagnarinnar {0} á að vera " "í {1}falli í stað {2}falls.".format(
verb, correct_case, wrong_case
),
start=start,
end=end,
code=code,
Expand All @@ -1048,8 +1050,9 @@ def annotate_wrong_obj_case(obj_case_abbr: str, correct_case_abbr: str) -> None:
self._ann.append(
Annotation(
text="Andlag sagnarinnar 'að {0}' " "á að vera í {1}falli".format(verb, correct_case),
detail="Andlag sagnarinnar {0} á að vera "
"í {1}falli í stað {2}falls.".format(verb, correct_case, wrong_case),
detail="Andlag sagnarinnar {0} á að vera " "í {1}falli í stað {2}falls.".format(
verb, correct_case, wrong_case
),
start=index,
end=index,
code=code,
Expand Down

0 comments on commit 435678d

Please sign in to comment.