In [1]:
from __future__ import annotations
from typing import Union, List, Optional, Dict, Tuple

In [2]:
from pathlib import Path
from collections import OrderedDict
from strictyaml import load, MapPattern, Str, Seq

In [3]:
schema = MapPattern(Str(), Str() | MapPattern(Str(), Str()) | Seq(Str()))

In [4]:
def read_yaml_file(fpath: Path):
    """Load the content of a YAML file as an ordered dictionary"""
    raw_data = fpath.open().read()
    return load(raw_data, schema)

In [5]:
YAML_FILES: List[str] = [
    "A.yaml",
    "B.yaml",
    "C.yaml",
    "D.yaml",
    "E.yaml",
    "F.yaml",
    "G.yaml",
    "H.yaml",
    "I.yaml",
    "J.yaml",
    "K.yaml",
    "L.yaml",
    "M.yaml",
    "N.yaml",
    "O.yaml",
    "P.yaml",
    "Q.yaml",
    "R.yaml",
    "S.yaml",
    "T.yaml",
    "U.yaml",
    "V.yaml",
    "W.yaml",
    "X.yaml",
    "Y.yaml",
    "Z.yaml",
    "_other.yaml",
]

In [6]:
def has_apostrophe(word):
    return word[-2:] == "'S"

In [7]:
def get_new_pronun(word: str, pronun: str) -> Optional[str]:
    if pronun[-6:] == " IH0 Z":
        new_pronun = pronun[:-6] + " ' IH0 Z"
        print(f"{word}: {pronun} -> {new_pronun}")
    elif pronun[-2:] == " Z":
        new_pronun = pronun[:-2] + " ' Z"
        print(f"{word}: {pronun} -> {new_pronun}")
    elif pronun[-2:] == " S":
        new_pronun = pronun[:-2] + " ' S"
        print(f"{word}: {pronun} -> {new_pronun}")
    else:
        raise ValueError(f"S is not pronounced: {word}: {pronun}")
    return new_pronun

In [11]:
def insert_apostrophes(yaml_dict):
    dictionary = yaml_dict.data
    counter = 0
    for word, value in dictionary.items():
        if not has_apostrophe(word):
            continue
        if isinstance(value, str):
            pronun: str = value
            if "'" in pronun:
                print(f"{word} already has an apostrophe")
            else:
                yaml_dict[word] = get_new_pronun(word, pronun)
        else:
            if isinstance(value, list):
                pronun_list: List[str] = value
                new_pronuns = []
                for pronun in pronun_list:
                    new_pronuns.append(get_new_pronun(word, pronun))
                if "'" in pronun:
                    print(f"{word} already has an apostrophe")
                else:
                    yaml_dict[word] = new_pronuns
            elif isinstance(value, OrderedDict):
                pronun_dict: OrderedDict[str, str] = value
                new_pronuns = OrderedDict()
                for k, pronun in pronun_dict.items():
                    new_pronuns[k] = get_new_pronun(word, pronun)
                if "'" in pronun:
                    print(f"{word} already has an apostrophe")
                else:
                    yaml_dict[word] = new_pronuns
            else:
                raise ValueError(f"unexpected type: {type(value)}")
        counter += 1
    return counter

In [12]:
base_path = Path("..") / "dictionary"
for yaml_file in YAML_FILES:
    yaml_path = base_path / yaml_file
    yaml_dict = read_yaml_file(yaml_path)
    num_changed = insert_apostrophes(yaml_dict)
    print(f"changed {num_changed} entries in {yaml_file}")
    yaml_path.open("w").write(yaml_dict.as_yaml())

ABRAM'S already has an apostrophe
ACCOR'S already has an apostrophe
ADMINISTRATOR'S already has an apostrophe
AINSWORTH'S already has an apostrophe
AIRCAL'S already has an apostrophe
AIRPLANE'S already has an apostrophe
AIRWAYS'S already has an apostrophe
AKERS'S already has an apostrophe
ALCOA'S already has an apostrophe
ALYESKA'S already has an apostrophe
AMES'S already has an apostrophe
AMEX'S already has an apostrophe
AMTRAK'S already has an apostrophe
ANDREW'S already has an apostrophe
ANHEUSER'S already has an apostrophe
ANTIGONE'S already has an apostrophe
ANTONIU'S already has an apostrophe
APARTHEID'S already has an apostrophe
ARAFAT'S already has an apostrophe
ARIAS'S already has an apostrophe
ASKERS'S already has an apostrophe
ASKIN'S already has an apostrophe
ASPIRIN'S already has an apostrophe
AVON'S already has an apostrophe
AYER'S: EH1 R ' Z -> EH1 R ' ' Z
AYER'S: EY1 R ' Z -> EY1 R ' ' Z
AYER'S already has an apostrophe
changed 25 entries in A.yaml
BADEN'S: B AA1 D AX N

JAFFRAY'S: JH AE1 F R EY2 Z -> JH AE1 F R EY2 ' Z
JENRETTE'S: JH EH0 N R EH1 T S -> JH EH0 N R EH1 T ' S
JIMBO'S: JH IH1 M B OW0 Z -> JH IH1 M B OW0 ' Z
JOBS'S: JH AA1 B Z IH0 Z -> JH AA1 B Z ' IH0 Z
JOHNS'S: JH AA1 N Z IH0 Z -> JH AA1 N Z ' IH0 Z
JONBENET'S: JH AO1 N B AX N EY1 S -> JH AO1 N B AX N EY1 ' S
JONES'S: JH OW1 N Z IH0 Z -> JH OW1 N Z ' IH0 Z
JOSHUA'S: JH AO1 SH UW2 AX Z -> JH AO1 SH UW2 AX ' Z
JOSKE'S: JH AO1 S K IY0 Z -> JH AO1 S K IY0 ' Z
JOURNEYMAN'S: JH ER1 N IY0 M AX N Z -> JH ER1 N IY0 M AX N ' Z
changed 11 entries in J.yaml
KANEMARU'S: K AA2 N EY0 M AA1 R UW0 Z -> K AA2 N EY0 M AA1 R UW0 ' Z
KIPLINGER'S: K IH1 P L IH2 NG G AXR Z -> K IH1 P L IH2 NG G AXR ' Z
KOSLOW'S: K AA1 Z L OW0 Z -> K AA1 Z L OW0 ' Z
changed 3 entries in K.yaml
LAIDLAW'S: L EY1 D L AO1 Z -> L EY1 D L AO1 ' Z
LAURIAT'S: L AO1 R IY0 IH0 T S -> L AO1 R IY0 IH0 T ' S
LEEDS'S: L IY1 D Z IH0 Z -> L IY1 D Z ' IH0 Z
LEVELER'S: L EH1 V AX L AXR Z -> L EH1 V AX L AXR ' Z
LEVELER'S: L EH1 V L AXR Z -> L EH

TELXON'S: T EH1 L Z AX N Z -> T EH1 L Z AX N ' Z
THACKERY'S: TH AE1 K AXR IY0 Z -> TH AE1 K AXR IY0 ' Z
THERAPISTS'S: TH EH1 R AX P IH0 S T S -> TH EH1 R AX P IH0 S T ' S
THIRTEEN'S: TH ER1 T IY2 N Z -> TH ER1 T IY2 N ' Z
TIMES'S: T AY1 M Z IH0 Z -> T AY1 M Z ' IH0 Z
TODI'S: T OW1 D IY0 S -> T OW1 D IY0 ' S
TOSCANINI'S: T AH2 S K AX N IY1 N IY0 Z -> T AH2 S K AX N IY1 N IY0 ' Z
TOTINO'S: T AX T IY1 N OW0 Z -> T AX T IY1 N OW0 ' Z
TOWNSHIP'S: T AW1 N SH IH2 P S -> T AW1 N SH IH2 P ' S
TRANSAMERICA'S: T R AE2 N Z AX M EH1 R IH0 K AX Z -> T R AE2 N Z AX M EH1 R IH0 K AX ' Z
TRANSCANADA'S: T R AE2 N Z K AE1 N AX D AX Z -> T R AE2 N Z K AE1 N AX D AX ' Z
TRIZEC'S: T R IH1 Z EH0 K S -> T R IH1 Z EH0 K ' S
TUCCI'S: T UW1 CH IY0 S -> T UW1 CH IY0 ' S
TUDOR'S: T UW1 D ER0 Z -> T UW1 D ER0 ' Z
TUSSAUD'S: T UW0 S OW1 Z -> T UW0 S OW1 ' Z
changed 16 entries in T.yaml
UTAH'S: Y UW1 T AA0 Z -> Y UW1 T AA0 ' Z
changed 1 entries in U.yaml
VANDERGRIFT'S: V AE1 N D AXR G R IH2 F T S -> V AE1 N D AXR G R

In [13]:
def find_missing_apostrophes(yaml_dict):
    dictionary = yaml_dict.data
    counter = 0
    for word, value in dictionary.items():
        if "'" not in word:
            continue
        if isinstance(value, str):
            pronun: str = value
            if "'" not in pronun:
                print(f"{word} has no apostrophe")
                counter += 1
        else:
            if isinstance(value, list):
                pronun_list: List[str] = value
                pronun = pronun_list[0]
                if "'" not in pronun:
                    print(f"{word} has no apostrophe")
                    counter += 1
            elif isinstance(value, OrderedDict):
                pronun_dict: OrderedDict[str, str] = value
                pronun = pronun_dict[next(iter(pronun_dict))]
                if "'" not in pronun:
                    print(f"{word} has no apostrophe")
                    counter += 1
            else:
                raise ValueError(f"unexpected type: {type(value)}")
    return counter

In [15]:
base_path = Path("..") / "dictionary"
for yaml_file in YAML_FILES:
    yaml_path = base_path / yaml_file
    yaml_dict = read_yaml_file(yaml_path)
    num_changed = find_missing_apostrophes(yaml_dict)
    print(f"found {num_changed} entries in {yaml_file}")

found 0 entries in A.yaml
found 0 entries in B.yaml
found 0 entries in C.yaml
found 0 entries in D.yaml
found 0 entries in E.yaml
found 0 entries in F.yaml
found 0 entries in G.yaml
found 0 entries in H.yaml
found 0 entries in I.yaml
found 0 entries in J.yaml
found 0 entries in K.yaml
found 0 entries in L.yaml
found 0 entries in M.yaml
found 0 entries in N.yaml
found 0 entries in O.yaml
found 0 entries in P.yaml
found 0 entries in Q.yaml
found 0 entries in R.yaml
found 0 entries in S.yaml
found 0 entries in T.yaml
found 0 entries in U.yaml
found 0 entries in V.yaml
found 0 entries in W.yaml
found 0 entries in X.yaml
found 0 entries in Y.yaml
found 0 entries in Z.yaml
found 0 entries in _other.yaml
