In [1]:
from collections import OrderedDict
from utils import apply

In [2]:
def check_existence(dictionary, word, pronunciation):
    value = dictionary.get(word)
    if value is None:
        return False
    if isinstance(value, str):
        found_pronun: str = value
        return pronunciation == found_pronun
    elif isinstance(value, list):
        found_pronun_list: List[str] = value
        return any(pronunciation == found_pronun for found_pronun in found_pronun_list)
    elif isinstance(value, OrderedDict):
        found_pronun_dict: OrderedDict[str, str] = value
        return any(pronunciation == found_pronun for found_pronun in found_pronun_dict.values())
    else:
        raise ValueError(f"unexpected type: {type(value)}")

In [3]:
def is_regular_form(dictionary, word, pronun):
    # first check for possessive form because it's a more specific check this has to come first
    if word[-2:] == "'S" and pronun[-6:] == " IH0 Z":
        base_word = word[:-2]
        base_pronun = pronun[:-6]
        form_name = "possessive after sibilant"
    # less specific check for possessives
    elif word[-2:] == "'S" and (pronun[-2:] == " S" or pronun[-2:] == " Z"):
        base_word = word[:-2]
        base_pronun = pronun[:-2]
        form_name = "possessive"
    # plural forms and third-person present verbs
    elif word[-1] == "S" and (pronun[-2:] == " S" or pronun[-2:] == " Z"):
        # check if the base form exists
        base_word = word[:-1]
        base_pronun = pronun[:-2]
        form_name = "plural/third-person"
    elif word[-2:] == "ED" and (pronun[-2:] == " D" or pronun[-2:] == " T"):
        # check if the base form exists
        base_word = word[:-2]
        base_pronun = pronun[:-2]
        form_name = "simple past"
    elif word[-3:] == "ING" and pronun[-7:] == " IH0 NG":
        # check if the base form exists
        base_word = word[:-3]
        base_pronun = pronun[:-7]
        form_name = "-ing"
    elif word[-3:] == "LLY" and pronun[-6:] == " L IY0":
        # check if the base form exists
        base_word = word[:-2]
        base_pronun = pronun[:-4]
        form_name = "adverb (merged)"
    elif word[-2:] == "LY" and pronun[-6:] == " L IY0":
        # check if the base form exists
        base_word = word[:-2]
        base_pronun = pronun[:-6]
        form_name = "adverb"
    else:
        return False
    if check_existence(dictionary, base_word, base_pronun):
        if pronun[-2:] == " S" and pronun[-4:-2] not in (" F", " K", " P", " T", "TH"):
            print(f"not a regular form: {word} ({pronun}) (base form: {base_word})")
            return False
        if pronun[-2:] == " T" and pronun[-4:-2] not in (" F", " K", " P", " S", "SH", "CH", "TH"):
            print(f"not a regular form: {word} ({pronun}) (base form: {base_word})")
            return False
        print(f"found {form_name} form: {word} (base form: {base_word})")
        return True
    return False

In [4]:
def single(word, pronun, dictionary):
    if is_regular_form(dictionary, word, pronun):
        return False
    return True

def p_list(word, pronun_list, dictionary):
    for pronun in pronun_list:
        if is_regular_form(dictionary, word, pronun):
            return False
    return True

In [5]:
apply(single, p_list, only_first_file=True)

found plural/third-person form: ADAMS (base form: ADAM)
not a regular form: AGNES (AE1 G N IH0 S) (base form: AGNE)
not a regular form: ALMOS (AA1 L M OW0 S) (base form: ALMO)
not a regular form: ANAS (AE1 N AX S) (base form: ANA)
not a regular form: ASTROS (AE1 S T R OW0 S) (base form: ASTRO)
not a regular form: ATLAS (AE1 T L AX S) (base form: ATLA)
removed 1 entries in A.yaml
