# Text phonemizers

Assumes espeak backend is installed via `apt-get install espeak` on Ubuntu or `brew install espeak` on MacOS.

In [23]:
#| default_exp text.phonemizers

In [24]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
#| export
import platform
import os
if platform.system() == 'Darwin':
    os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = "/opt/homebrew/Cellar/espeak/1.48.04_1/lib/libespeak.dylib"

from dotenv import load_dotenv
load_dotenv()

from phonemizer.backend import EspeakBackend
from phonemizer.backend.espeak.language_switch import LanguageSwitch
from phonemizer.backend.espeak.words_mismatch import WordMismatch
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator
from phonemizer import phonemize

from typing import List, Tuple, Iterable
from plum import dispatch

In [26]:
#| export

class Phonemizer():
    def __init__(self,
        separator=Separator(word=" ", syllable="|", phone=None), # separator
        language='en-us', # language
        backend='espeak', # phonemization backend (espeak)
        strip=True, # strip
        preserve_punctuation=True # preserve punctuation
        ):
        self.separator = separator
        self.language = language
        self.backend = backend
        self.strip = strip
        self.preserve_punctuation = preserve_punctuation
    
    @dispatch
    def __call__(self, text:str, n_jobs=1)->str:
        return(
            phonemize(
                text,
                language=self.language,
                backend=self.backend,
                separator=self.separator,
                strip=self.strip,
                preserve_punctuation=self.preserve_punctuation,
                njobs=n_jobs
                )
        )

    @dispatch
    def __call__(self, texts:List[str], n_jobs=1)->List[str]:
        return(
            [phonemize(
                text,
                language=self.language,
                backend=self.backend,
                separator=self.separator,
                strip=self.strip,
                preserve_punctuation=self.preserve_punctuation,
                njobs=n_jobs
                )
        for text in texts])

## usage

In [27]:
p = Phonemizer()
text = "oh shoot I missed my train"
print(p(text))
text = ["Oh Dear, you'll be fine!", "this is it"]
print(p(text))

oʊ ʃuːt aɪ mɪst maɪ tɹeɪn
['oʊ dɪɹ, juːl biː faɪn!', 'ðɪs ɪz ɪt']


In [28]:
#| hide
import nbdev; nbdev.nbdev_export()