In [76]:
texts = [
    "Try these pages.", 
    "In 2009 these reports were collected in the book Chambermaids and Soldiers.", 
    "In later years in films she switched to playing character parts.", 
    "The other bodies are juxtaposed in various unlit areas behind them."
]

In [126]:
from synthesis import Synthesizer
from audio.audio_io import load_to_torch, save_from_torch
from pathlib import Path
from tqdm import tnrange

In [80]:
speaker_reference_path = [
    Path("./log/reference/zs1.wav"),
    Path("./log/reference/zs2.wav"),
    Path("./log/reference/zs3.wav"),
    Path("./log/reference/zs4.wav"),
]

residual_reference_path = [
    Path("./log/reference/zr1.wav"),
    Path("./log/reference/zr2.wav"),
    Path("./log/reference/zr3.wav"),
    Path("./log/reference/zr4.wav")
]

In [82]:
speaker_reference_wave = [load_to_torch(p, 16000) for p in speaker_reference_path]
residual_reference_wave = [load_to_torch(p, 16000) for p in residual_reference_path]

In [83]:
synthesizer = Synthesizer("./log/checkpoint_142k.pyt")


Initialising Tacotron Model...

Trainable Parameters: 19.806M

Initializing STFT Model...


Loading Weights: "./log/checkpoint_142k.pyt"

+---------------+-------------+------+------+----------+------+------+
| Tacotron(r=3) | Sample Rate | NFFT | NMel | Speakers | SPKD | NOID |
+---------------+-------------+------+------+----------+------+------+
|     142k      |    16000    | 2048 | 128  |   200    |  64  |  8   |
+---------------+-------------+------+------+----------+------+------+
 


In [85]:
speaker_latent = [synthesizer.inference_speaker_noise(wave)[0] for wave in speaker_reference_wave]
residual_latent = [synthesizer.inference_speaker_noise(wave)[1] for wave in residual_reference_wave]

In [99]:
waves = {}

In [101]:
for speaker in tnrange(4):
    for residual in tnrange(4):
        for text in tnrange(4):
            waves[(speaker, residual, text)] = synthesizer.synthesis(texts[text], speaker_latent[speaker], residual_latent[residual])

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 84]
[1, 1025, 354]
[1, 1025, 243]
[1, 1025, 273]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 84]
[1, 1025, 318]
[1, 1025, 240]
[1, 1025, 264]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 81]
[1, 1025, 1152]
[1, 1025, 243]
[1, 1025, 270]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 81]
[1, 1025, 1536]
[1, 1025, 249]
[1, 1025, 264]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 81]
[1, 1025, 297]
[1, 1025, 225]
[1, 1025, 264]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 81]
[1, 1025, 294]
[1, 1025, 228]
[1, 1025, 261]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 78]
[1, 1025, 288]
[1, 1025, 222]
[1, 1025, 243]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 78]
[1, 1025, 291]
[1, 1025, 222]
[1, 1025, 249]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 90]
[1, 1025, 324]
[1, 1025, 246]
[1, 1025, 285]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 90]
[1, 1025, 324]
[1, 1025, 249]
[1, 1025, 279]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 87]
[1, 1025, 324]
[1, 1025, 246]
[1, 1025, 267]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 87]
[1, 1025, 333]
[1, 1025, 252]
[1, 1025, 267]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 81]
[1, 1025, 321]
[1, 1025, 243]
[1, 1025, 264]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 81]
[1, 1025, 318]
[1, 1025, 243]
[1, 1025, 264]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 78]
[1, 1025, 315]
[1, 1025, 234]
[1, 1025, 255]


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

[1, 1025, 75]
[1, 1025, 336]
[1, 1025, 246]
[1, 1025, 264]



In [102]:
from IPython.display import Audio

In [125]:
Audio(waves[(0, 1, 0)][0].cpu().numpy(), rate=16000)

In [128]:
for speaker in range(4):
    for residual in range(4):
        for text in range(4):
            save_from_torch(waves[(speaker, residual, text)][0].cpu(), f"./log/synthesis/wave/{speaker}{residual}{text}.wav", 16000)