Merge branch 'master' of https://github.com/mideind/GreynirCorrect in…

…to agreement
mideind · Oct 26, 2023 · 435678d · 435678d
2 parents 287cdac + 7ec7c8a
commit 435678d
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 51 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: [ "3.8", "3.9", "3.10", "3.11", "pypy-3.9", "pypy-3.10"]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.9", "pypy-3.10"]
 
     steps:
     - uses: actions/checkout@v4
@@ -28,11 +28,11 @@ jobs:
         python -m pip install --upgrade pip wheel setuptools pytest
         python -m pip install git+https://github.com/mideind/Tokenizer#egg=tokenizer
         python -m pip install git+https://github.com/mideind/GreynirPackage#egg=reynir
-        # The neural sentence classifier doesn't work with pypy so don't try to install it
-        if [ "${{ matrix.python-version }}" == "pypy-3.9" ] || [ "${{ matrix.python-version }}" == "pypy-3.10" ]; then
-          python -m pip install -e .
+        # No need to test the sentence classifier in every build (also doesn't work with PyPy)
+        if [ "${{ matrix.python-version }}" == "3.8" ]; then
+          python -m pip install -e ".[sentence_classifier]"
         else
-          python -m pip install -e .[sentence_classifier]
+          python -m pip install -e ".[dev]"
         fi
     - name: Typecheck with mypy
       run: |

diff --git a/eval/eval.py b/eval/eval.py
@@ -85,29 +85,38 @@
     $ python eval.py -a
 
 """
-
 from typing import (
     TYPE_CHECKING,
+    Any,
+    Counter,
+    DefaultDict,
     Dict,
+    Iterable,
     List,
     Optional,
     Set,
-    Union,
     Tuple,
-    Iterable,
+    Union,
     cast,
-    Any,
-    DefaultDict,
-    Counter,
 )
 
-import os
-from collections import defaultdict
-from datetime import datetime
+import argparse
 import glob
+import os
 import random
-import argparse
 import xml.etree.ElementTree as ET
+from collections import defaultdict
+from datetime import datetime
+
+from reynir_correct import (
+    Annotation,
+    CorrectedSentence,
+    CorrectionPipeline,
+    GreynirCorrect,
+    GreynirCorrectAPI,
+    Settings,
+)
+from tokenizer import TOK, Tok, detokenize
 
 if TYPE_CHECKING:
     # For some reason, types seem to be missing from the multiprocessing module
@@ -116,17 +125,6 @@
 else:
     import multiprocessing
 
-from reynir import _Sentence
-from tokenizer import detokenize, Tok, TOK
-
-from reynir_correct.annotation import Annotation
-from reynir_correct.checker import (
-    GreynirCorrect,
-    Settings,
-    AnnotatedSentence,
-    check as gc_check,
-)
-
 
 # Disable Pylint warnings arising from Pylint not understanding the typing module
 # pylint: disable=no-member
@@ -157,7 +155,8 @@
 
 settings = Settings()
 settings.read(os.path.join("config", "GreynirCorrect.conf"))
-rc = GreynirCorrect(settings)
+gc = GreynirCorrect(settings, pipeline=CorrectionPipeline("", settings=settings))
+rc = GreynirCorrectAPI(gc=gc)
 
 # Create a lock to ensure that only one process outputs at a time
 OUTPUT_LOCK = multiprocessing.Lock()
@@ -591,7 +590,7 @@
 
 parser = argparse.ArgumentParser(
     description=(
-        "This program evaluates the spelling and grammar checking performance " "of GreynirCorrect on iceErrorCorpus"
+        "This program evaluates the spelling and grammar checking performance of GreynirCorrect on iceErrorCorpus"
     )
 )
 
@@ -1687,20 +1686,13 @@ def process(fpath_and_category: Tuple[str, str]) -> Dict[str, Any]:
                 # Nothing to do: drop this and go to the next sentence
                 continue
             # print(text)
-            options = {}
-            options["annotate_unparsed_sentences"] = True  # True is default
-            options["suppress_suggestions"] = False  # False is default
-            options["ignore_rules"] = set(
-                [
-                    "",
-                ]
-            )
             # Pass it to GreynirCorrect
-            pg = [list(p) for p in gc_check(text, rc=rc, **options)]
-            s: Optional[_Sentence] = None
-            if len(pg) >= 1 and len(pg[0]) >= 1:
-                s = pg[0][0]
-            if len(pg) > 1 or (len(pg) == 1 and len(pg[0]) > 1):
+            result = rc.correct(text=text, suppress_suggestions=False, ignore_rules=set())
+            pg = result.sentences
+            s: Optional[CorrectedSentence] = None
+            if len(pg) >= 1:
+                s = pg[0]
+            if len(pg) > 1 or (len(pg) == 1):
                 # if QUIET:
                 #     bprint(f"In file {fpath}:")
                 # bprint(
@@ -1770,8 +1762,8 @@ def sentence_results(
                 return gc_error, ice_error
 
             assert s is not None
-            assert isinstance(s, AnnotatedSentence)
-            gc_error, ice_error = sentence_results(s.annotations, errors)
+            assert isinstance(s, CorrectedSentence)
+            gc_error, ice_error = sentence_results(s.annotations or [], errors)
 
             def token_results(
                 hyp_annotations: Iterable[Annotation],
@@ -2019,7 +2011,6 @@ def token_results(
                     wrong_span,
                 )
 
-            assert isinstance(s, AnnotatedSentence)
             (
                 tp,
                 fp,
@@ -2031,7 +2022,7 @@ def token_results(
                 cfn,
                 right_span,
                 wrong_span,
-            ) = token_results(s.annotations, errors)
+            ) = token_results(s.annotations or [], errors)
             tn = len(tokens) - tp - fp - fn
             ctn = len(tokens) - ctp - cfp - cfn
             # Collect statistics into the stats list, to be returned

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,6 +22,7 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
     "Topic :: Software Development :: Libraries :: Python Modules",

diff --git a/src/reynir_correct/errfinder.py b/src/reynir_correct/errfinder.py
@@ -544,8 +544,9 @@ def VillaEinkunn(self, txt: str, variants: str, node: Node) -> AnnotationDict:
         return AnnotationDict(
             text="'{0}' á sennilega að vera '{1}'".format(wrong_pronoun, correct_pronoun),
             detail=(
-                "Fornafnið '{0}' á að vera í {1}falli, eins og "
-                "nafnliðurinn sem fylgir á eftir".format(wrong_pronoun, CASE_NAMES[correct_case])
+                "Fornafnið '{0}' á að vera í {1}falli, eins og " "nafnliðurinn sem fylgir á eftir".format(
+                    wrong_pronoun, CASE_NAMES[correct_case]
+                )
             ),
             start=start,
             end=end,
@@ -1031,8 +1032,9 @@ def annotate_wrong_obj_case(obj_case_abbr: str, correct_case_abbr: str) -> None:
                     self._ann.append(
                         Annotation(
                             text="Á líklega að vera '{0}'".format(correct_np),
-                            detail="Andlag sagnarinnar {0} á að vera "
-                            "í {1}falli í stað {2}falls.".format(verb, correct_case, wrong_case),
+                            detail="Andlag sagnarinnar {0} á að vera " "í {1}falli í stað {2}falls.".format(
+                                verb, correct_case, wrong_case
+                            ),
                             start=start,
                             end=end,
                             code=code,
@@ -1048,8 +1050,9 @@ def annotate_wrong_obj_case(obj_case_abbr: str, correct_case_abbr: str) -> None:
                 self._ann.append(
                     Annotation(
                         text="Andlag sagnarinnar 'að {0}' " "á að vera í {1}falli".format(verb, correct_case),
-                        detail="Andlag sagnarinnar {0} á að vera "
-                        "í {1}falli í stað {2}falls.".format(verb, correct_case, wrong_case),
+                        detail="Andlag sagnarinnar {0} á að vera " "í {1}falli í stað {2}falls.".format(
+                            verb, correct_case, wrong_case
+                        ),
                         start=index,
                         end=index,
                         code=code,