In [30]:
from pathlib import Path
from enum import Enum, auto

In [2]:
from utils import read_yaml_file, YAML_FILES

In [4]:
base_path = Path("..") / "entries"
entries = {}
for yaml_file in YAML_FILES:
    yaml_path = base_path / yaml_file
    yaml_obj, dictionary = read_yaml_file(yaml_path)
    entries.update(dictionary)

In [5]:
len(entries)

38825

In [8]:
next(iter(entries.items()))

('A', ordereddict([('det', 'EY1'), ('noun', 'EY1')]))

In [22]:
def apply(entries, func):
    count = 0
    for value in entries.values():
        if isinstance(value, str):
            count += func(value.split())
        elif isinstance(value, list):
            for item in value:
                count += func(item.split())
        elif isinstance(value, dict):
            for item in value.values():
                count += func(item.split())
    return count

In [23]:
def stressed(ph):
    return ph[-1] == "1"

In [36]:
def unstressed(ph):
    return ph[-1] in ("0", "2", "ə") or ph == "əR"

In [37]:
def vowel(ph):
    return ph[-1] in ("0", "1", "2", "ə") or ph == "əR"

In [38]:
class ST(Enum):
    unknown = auto()
    unstressed = auto()
    stressed = auto()

In [39]:
def penultimate(phonemes):
    ph_revs = reversed(phonemes)
    last = ST.unknown
    for ph in ph_revs:
        if last == ST.unknown:
            if unstressed(ph):
                last = ST.unstressed
                continue
            elif stressed(ph):
                last = ST.stressed
                continue
        elif last == ST.unstressed:
            if unstressed(ph):
                return 0  # last two syllables are both unstressed
            elif stressed(ph):
                return 1  # pattern fits!
        elif last == ST.stressed:
            if vowel(ph):  # last syllable was stressed and there is more than one syllable!
                return 0
    return 1  # one syllable word

In [42]:
assert penultimate("S EY1".split())
assert not penultimate("S ə P OHR1 T".split())
assert penultimate("S UR1 N EY2 M".split())
assert penultimate("S ə S P EH1 N SH ə N".split())
# assert penultimate("S EY1".split()) == 1
# assert penultimate("S EY1".split()) == 1
# assert penultimate("S EY1".split()) == 1

In [43]:
apply(entries, penultimate)

25991