## Tacotron 2 inference 


In [1]:
!pip install khmer-nltk
!pip install khmer

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


#### Import libraries and setup matplotlib

In [2]:
import matplotlib
%matplotlib inline
import matplotlib.pylab as plt
import IPython.display as ipd

import sys
sys.path.append('waveglow/')
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT
from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence
# from denoiser import Denoiser

import re
from text2num.text.num2word import num2word, num_en2km
from khmernltk import word_tokenize
import run

  from .autonotebook import tqdm as notebook_tqdm
This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/home/phannet.pov/.conda/envs/tacotron2/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/phannet.pov/.conda/envs/tacotron2/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/phannet.pov/.conda/envs/tacotron2/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/phannet.pov/.conda/envs/tacotron2/lib/python3.6/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/home/phannet.pov/.conda/envs/tacotron2/lib/python3.6/site-packages/

| 2023-08-17 09:38:16,990 | [1;32mINFO[0m | khmer-nltk | Loaded model from /home/phannet.pov/.conda/envs/tacotron2/lib/python3.6/site-packages/khmernltk/word_tokenize/sklearn_crf_ner_10000.sav |
| 2023-08-17 09:38:17,031 | [1;32mINFO[0m | khmer-nltk | Loaded model from /home/phannet.pov/.conda/envs/tacotron2/lib/python3.6/site-packages/khmernltk/pos_tag/sklearn_crf_pos_alt_0.9846.sav |


In [3]:
def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', interpolation='none')
    xlabel = 'Decoder timestep'
    plt.xlabel(xlabel)
    plt.ylabel('Encoder timestep')
    plt.tight_layout()

#### Setup hparams

In [4]:
hparams = create_hparams()
hparams.sampling_rate = 22050

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



#### Load WaveGlow for mel2audio synthesis 

In [5]:
waveglow_path = 'waveglow_256channels_universal_v5.pt'
waveglow = torch.load(waveglow_path)['model']
waveglow.cuda().eval()
for k in waveglow.convinv:
    k.float()
denoiser = Denoiser(waveglow)



RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

#### Load model from checkpoint

In [None]:
checkpoint_path = "/home/phannet.pov/tacotron2_khmer/checkpoint_male" #Male
# checkpoint_path = "/home/phannet.pov/tacotron2_khmer/checkpoint_female" #Female
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
_ = model.cuda().eval()

#### Text input

In [None]:
# text = "ខ្ញុំបានចំណេាយលុយចំនួន105000៛សម្រាប់អាហារពេលល្ងាចមួយពេល។"
text = "ពាក់ព័ន្ធនឹងបច្ចេកវិទ្យាឌីជីថលស្របតាមគោលនយោបាយរដ្ឋាភិបាលឌីជីថលកម្ពុជា"
# text="ប្រធានបទ ១ នៃការប្រកួតប្រជែងក្នុងកម្មវិធីនេះ ក៏មានពាក់ព័ន្ធនឹងមនុស្សយន្តដូចខ្ញុំផងដែរ"

In [None]:
text = run.text_process(text)
input = run.textNorm(text)
sequence = np.array(text_to_sequence(input, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cuda().long()

#### Decode text input and plot results

In [None]:
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
plot_data((mel_outputs.float().data.cpu().numpy()[0],
           mel_outputs_postnet.float().data.cpu().numpy()[0],
           alignments.float().data.cpu().numpy()[0].T))

#### Synthesize audio from spectrogram using WaveGlow

In [None]:
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)