Skip to content

Commit

Permalink
Merge pull request #25 from obulat/fix/frozen_set_issue
Browse files Browse the repository at this point in the history
Fix frozen set issue
  • Loading branch information
obulat committed Dec 29, 2022
2 parents ebdbb17 + 02b3833 commit 5adc7c7
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, '3.10']
python-version: [3.9, '3.10']

steps:
- uses: actions/checkout@v2
Expand Down
20 changes: 12 additions & 8 deletions zeyrek/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ class PosInfo(NamedTuple):


class RootAttribute(Enum):
"""These represents attributes of roots."""
"""This represents attributes of roots."""

# Generally Present tense (Aorist) suffix has the form [Ir]; such as gel-ir, bul-ur, kapat-ır.
# But for most verbs with single syllable and compound verbs it forms as [Ar].
# Such as yap-ar, yet-er, hapsed-er. There are exceptions for this case, such as "var-ır".
# Below two represents the attributes for clearing the ambiguity. These attributes does not
# Below two represents the attributes for clearing the ambiguity. These attributes do not
# modify the root form.
Aorist_I = auto()
Aorist_A = auto()
Expand Down Expand Up @@ -282,10 +282,14 @@ class PhoneticAttribute(Enum):


@functools.lru_cache(maxsize=128, typed=False)
def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[PhoneticAttribute]:
def calculate_phonetic_attributes(
word: str,
predecessor_attrs: "tuple[PhoneticAttribute] | None" = None
) -> set[PhoneticAttribute]:
p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs)
# the word should be in lower case
if len(word) == 0:
return predecessor_attrs
return p_attrs
result = set()
last_letter = word[-1]
if last_letter in tr.vowels_lower_set:
Expand Down Expand Up @@ -313,16 +317,16 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon
else:
result.add(PhoneticAttribute.FirstLetterConsonant)
if last_vowel is None:
result.update(predecessor_attrs)
result.update(p_attrs)
result.update(no_vowel_attrs)
result.discard(PhoneticAttribute.LastLetterVowel)
result.discard(PhoneticAttribute.ExpectsConsonant)

return result


def parse_attr_data(data: str) -> Set:
attrs: Set = set()
def parse_attr_data(data: str) -> set[RootAttribute]:
attrs = set()
tokens = [_.strip() for _ in data.split(",")]
for s in tokens:
if s not in RootAttribute_set:
Expand All @@ -332,7 +336,7 @@ def parse_attr_data(data: str) -> Set:
return attrs


def infer_morphemic_attributes(word: str, pos_data, attrs: Set = None) -> Set:
def infer_morphemic_attributes(word: str, pos_data, attrs: "set[RootAttribute] | None" = None) -> set[RootAttribute]:
result = attrs if attrs is not None else set()
last = word[-1]
last_char_is_vowel = tr.is_vowel(last)
Expand Down
8 changes: 4 additions & 4 deletions zeyrek/morphotactics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3578,13 +3578,13 @@ def __str__(self):
def __repr__(self):
return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})"

def copy(self, surface_node: SurfaceTransition, phonetic_attributes: Set = None):
def copy(self, surface_node: SurfaceTransition, pa: "set[PhoneticAttribute] | None" = None):
phonetic_attributes = (
calculate_phonetic_attributes(
surface_node.surface, self.phonetic_attributes
surface_node.surface, tuple(self.phonetic_attributes)
)
if phonetic_attributes is None
else phonetic_attributes
if pa is None
else pa
)
is_terminal = surface_node.state.terminal
hist = self.transitions[:]
Expand Down
2 changes: 1 addition & 1 deletion zeyrek/rulebasedanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def advance(self, path: SearchPath):
# if tail is equal to surface, no need to calculate phonetic attributes.
tail_equals_surface = path.tail == surface
attributes = path.phonetic_attributes if tail_equals_surface \
else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes))
else calculate_phonetic_attributes(surface, tuple(path.phonetic_attributes))

# This is required for suffixes like `cik` and `ciğ`
# an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
Expand Down

0 comments on commit 5adc7c7

Please sign in to comment.