Merge pull request #25 from obulat/fix/frozen_set_issue

Fix frozen set issue
obulat · Dec 29, 2022 · 5adc7c7 · 5adc7c7
2 parents ebdbb17 + 02b3833
commit 5adc7c7
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 14 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9, '3.10']
+        python-version: [3.9, '3.10']
 
     steps:
     - uses: actions/checkout@v2

diff --git a/zeyrek/attributes.py b/zeyrek/attributes.py
@@ -77,12 +77,12 @@ class PosInfo(NamedTuple):
 
 
 class RootAttribute(Enum):
-    """These represents attributes of roots."""
+    """This represents attributes of roots."""
 
     # Generally Present tense (Aorist) suffix has the form [Ir]; such as gel-ir, bul-ur, kapat-ır.
     # But for most verbs with single syllable and compound verbs it forms as [Ar].
     # Such as yap-ar, yet-er, hapsed-er. There are exceptions for this case, such as "var-ır".
-    # Below two represents the attributes for clearing the ambiguity. These attributes does not
+    # Below two represents the attributes for clearing the ambiguity. These attributes do not
     # modify the root form.
     Aorist_I = auto()
     Aorist_A = auto()
@@ -282,10 +282,14 @@ class PhoneticAttribute(Enum):
 
 
 @functools.lru_cache(maxsize=128, typed=False)
-def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[PhoneticAttribute]:
+def calculate_phonetic_attributes(
+    word: str,
+    predecessor_attrs: "tuple[PhoneticAttribute] | None" = None
+) -> set[PhoneticAttribute]:
+    p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs)
     # the word should be in lower case
     if len(word) == 0:
-        return predecessor_attrs
+        return p_attrs
     result = set()
     last_letter = word[-1]
     if last_letter in tr.vowels_lower_set:
@@ -313,16 +317,16 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon
     else:
         result.add(PhoneticAttribute.FirstLetterConsonant)
     if last_vowel is None:
-        result.update(predecessor_attrs)
+        result.update(p_attrs)
         result.update(no_vowel_attrs)
         result.discard(PhoneticAttribute.LastLetterVowel)
         result.discard(PhoneticAttribute.ExpectsConsonant)
 
     return result
 
 
-def parse_attr_data(data: str) -> Set:
-    attrs: Set = set()
+def parse_attr_data(data: str) -> set[RootAttribute]:
+    attrs = set()
     tokens = [_.strip() for _ in data.split(",")]
     for s in tokens:
         if s not in RootAttribute_set:
@@ -332,7 +336,7 @@ def parse_attr_data(data: str) -> Set:
     return attrs
 
 
-def infer_morphemic_attributes(word: str, pos_data, attrs: Set = None) -> Set:
+def infer_morphemic_attributes(word: str, pos_data, attrs: "set[RootAttribute] | None" = None) -> set[RootAttribute]:
     result = attrs if attrs is not None else set()
     last = word[-1]
     last_char_is_vowel = tr.is_vowel(last)

diff --git a/zeyrek/morphotactics.py b/zeyrek/morphotactics.py
@@ -3578,13 +3578,13 @@ def __str__(self):
     def __repr__(self):
         return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})"
 
-    def copy(self, surface_node: SurfaceTransition, phonetic_attributes: Set = None):
+    def copy(self, surface_node: SurfaceTransition, pa: "set[PhoneticAttribute] | None" = None):
         phonetic_attributes = (
             calculate_phonetic_attributes(
-                surface_node.surface, self.phonetic_attributes
+                surface_node.surface, tuple(self.phonetic_attributes)
             )
-            if phonetic_attributes is None
-            else phonetic_attributes
+            if pa is None
+            else pa
         )
         is_terminal = surface_node.state.terminal
         hist = self.transitions[:]

diff --git a/zeyrek/rulebasedanalyzer.py b/zeyrek/rulebasedanalyzer.py
@@ -112,7 +112,7 @@ def advance(self, path: SearchPath):
             # if tail is equal to surface, no need to calculate phonetic attributes.
             tail_equals_surface = path.tail == surface
             attributes = path.phonetic_attributes if tail_equals_surface \
-                else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes))
+                else calculate_phonetic_attributes(surface, tuple(path.phonetic_attributes))
 
             # This is required for suffixes like `cik` and `ciğ`
             # an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.