In [None]:
!pip install numpy==1.20.3
!pip install sentencepiece==0.1.96

In [None]:
import csv
import re

import numpy as np
import sentencepiece as spm

from IPython.display import Audio

In [None]:
!git clone https://github.com/octanove/neuralmorse.git

In [None]:
token2symbol = {}
with open('neuralmorse/assignment.tsv') as f:
    reader = csv.reader(f, delimiter='\t')
    for row in reader:
        token2symbol[row[0]] = row[1]
token2symbol['▁'] = ' '

In [None]:
sp = spm.SentencePieceProcessor(model_file='neuralmorse/neuralmorse.sp.model')

In [None]:
SR = 16000

unit = 0.1    # length of one dot
u = np.linspace(0, unit, int(unit*SR))
u3 = np.linspace(0, unit*3, int(unit*SR*3))
freq_e4 = 329.63
freq_a4 = 440.00
freq_b4 = 493.88
freq_e5 = 659.25
space = np.zeros_like(u)

fade_time = 0.003
fade_in = np.linspace(0, 1, int(fade_time*SR))
fade_out = np.linspace(1, 0, int(fade_time*SR))
sus_u = np.ones((int(unit*SR) - 2*int(fade_time*SR)))
sus_u3 = np.ones((int(unit*3*SR) - 2*int(fade_time*SR)))
env_u = np.concatenate((fade_in, sus_u, fade_out))
env_u3 = np.concatenate((fade_in, sus_u3, fade_out))

element2audio = {
    'a': np.sin(2 * np.pi * freq_e4 * u) * env_u,
    'A': np.sin(2 * np.pi * freq_e4 * u3) * env_u3,
    'b': np.sin(2 * np.pi * freq_a4 * u) * env_u,
    'B': np.sin(2 * np.pi * freq_a4 * u3) * env_u3,
    'c': np.sin(2 * np.pi * freq_b4 * u) * env_u,
    'C': np.sin(2 * np.pi * freq_b4 * u3) * env_u3,
    'd': np.sin(2 * np.pi * freq_e5 * u) * env_u,
    'D': np.sin(2 * np.pi * freq_e5 * u3) * env_u3,
    ' ': space,
}

In [None]:
def normalize(text):
    text = text.lower()
    text = text.replace("’", "'")   # right single quotation mark -> apostrophe
    text = text.replace("‘", "'")   # left single quotation mark -> apostrophe
    text = text.replace('“', '"')   # left double quotation mark -> quotation mark
    text = text.replace('”', '"')   # right double quotation mark -> quotation mark
    text = text.replace('–', '-')   # en dash -> hyphen
    text = text.replace('—', '-')   # em dash -> hyphen
    text = text.replace("\u00AD", '')   # soft hyphen
    
    return text

In [None]:
REPLACES = [
    (" can ' t ", " ca n't "),
    (" could n ' t ", " could n't "),
    (" co ul d n ' t ", " could n't "),
    (" won ' t ", " wo n't "),
    (" would n ' t ", " would n't "),
    (" don ' t ", " do n't "),
    (" d on ' t ", " do n't "),
    (" doesn ' t ", " does n't "),
    (" do es n ' t ", " does n't "),
    (" didn ' t ", " did n't "),
    (" d id n ' t ", " did n't "),
    (" have n ' t ", " have n't "),
    (" has n ' t ", " has n't "),
    (" had n ' t ", " had n't "),
    (" are n ' t ", " are n't "),
    (" is n ' t ", " is n't "),
    (" was n ' t ", " was n't "),
    (" were n ' t ", " were n't "),
    (" should n ' t ", " should n't "),
    (" must n ' t ", " must n't "),
    (" might n ' t ", " might n't "),
    (" need n ' t ", " need n't "),
    (" ' m ", " 'm "),
    (" ' d ", " 'd "),
    (" ' s ", " 's "),
    (" ' re ", " 're "),
    (" ' ll ", " 'll "),
    (" ' ve ", " 've "),
    (" didn ", " did n "),
    (" doesn ", " does n ")
]

def postprocess(text):
    text = re.sub(r'▁([^ ])', '▁ \\1', text)
    text = re.sub(r'^▁ ', '', text)
    for before, after in REPLACES:
        text = text.replace(before, after)
    return text

In [None]:
def tokenize(text):
    text = normalize(text)
    token_ids = sp.encode(text)
    tokens = sp.id_to_piece(token_ids)
    tokens = postprocess(' '.join(tokens))
    
    return tokens

In [None]:
def tokens2audio(tokens):
    elements = []
    for token in tokens.split(' '):
        elements.append(' '.join(token2symbol[token]))
    elements = '   '.join(elements)
    
    audio = [element2audio[e] for e in elements]
    audio = np.concatenate(audio)

    return audio, elements

In [None]:
text = 'NeuralMorse is a method for encoding text with eight tonal alphabets'
tokens = tokenize(text)
print('tokens:', tokens)
audio, elements = tokens2audio(tokens)
print('elements:', elements)
Audio(audio, rate=SR)