In [7]:
import re
import phonemizer
from phonemizer.phonemize import phonemize

In [4]:
_pad = '_'
_eos = '~'
_bos = '^'
_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? '
_punctuations = '!\'(),-.:;? '
_phoneme_punctuations = '.!;:,?'
# add numbers
_numbers = '1234567890'

# Phonemes definition
_vowels = 'iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ'
_non_pulmonic_consonants = 'ʘɓǀɗǃʄǂɠǁʛ'
_pulmonic_consonants = 'pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ'
_suprasegmentals = 'ˈˌːˑ'
_other_symbols = 'ʍwɥʜʢʡɕʑɺɧ'
_diacrilics = 'ɚ˞ɫ'
_phonemes = sorted(list(_vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics))

# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
_arpabet = ['@' + s for s in _phonemes]

# Export all symbols:
symbols = [_pad, _eos, _bos] + list(_characters) + list(_numbers) + _arpabet
phonemes = [_pad, _eos, _bos] + list(_phonemes) + list(_punctuations)

In [10]:
_SYMBOL_TO_ID = {s: i for i, s in enumerate(symbols)}
_ID_TO_SYMBOL = {i: s for i, s in enumerate(symbols)}

_PHONEMES_TO_ID = {s: i for i, s in enumerate(phonemes)}
_ID_TO_PHONEMES = {i: s for i, s in enumerate(phonemes)}

# Regular expression matching text enclosed in curly braces:
_CURLY_RE = re.compile(r'(.*?)\{(.+?)\}(.*)')

# Regular expression matching punctuations, ignoring empty space
PHONEME_PUNCTUATION_PATTERN = r'['+_phoneme_punctuations+']+'

In [11]:
def text2phone(text, language):
    '''
    Convert graphemes to phonemes.
    '''
    seperator = phonemizer.separator.Separator(' |', '', '|')
    #try:
    punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
    ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language)
    ph = ph[:-1].strip() # skip the last empty character
    # Replace \n with matching punctuations.
    if punctuations:
        # if text ends with a punctuation.
        if text[-1] == punctuations[-1]:
            for punct in punctuations[:-1]:
                ph = ph.replace('| |\n', '|'+punct+'| |', 1)
            try:
                ph = ph + punctuations[-1]
            except:
                print(text)
        else:
            for punct in punctuations:
                ph = ph.replace('| |\n', '|'+punct+'| |', 1)
    return ph

In [None]:
text2phone("zhe4 shi4 yi2 ge4 ce4 shi4", 'zh')

In [13]:
import torch

In [14]:
a = torch.rand(2,3)

In [15]:
a

tensor([[0.6945, 0.6402, 0.2024],
        [0.2838, 0.7556, 0.7150]])

In [16]:
b = torch.rand(2, 3)

In [18]:
b

tensor([[0.6912, 0.3595, 0.9306],
        [0.7640, 0.3551, 0.0105]])

In [17]:
a * b

tensor([[0.4800, 0.2301, 0.1883],
        [0.2168, 0.2683, 0.0075]])

In [19]:
l = [(1, 2), (3, 4), (5, 6)]

In [20]:
x = []

In [21]:
for i in range(0, 3):
    for a, b in l:
        a = a * 3
        b = b * 9
        break
    x.append((a, b))

In [22]:
x

[(3, 18), (3, 18), (3, 18)]

In [23]:
l

[(1, 2), (3, 4), (5, 6)]

In [1]:
from layers.common_layers import LinearBN

In [2]:
x = LinearBN(10, 10)

In [3]:
import numpy as np

In [7]:
np.cumprod([5,5,24])

array([  5,  25, 600])