In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import sys

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [3]:
import tensorflow as tf
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import melgan
import malaya_speech.config
import numpy as np






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [4]:
melgan_config = malaya_speech.config.melgan_config

In [5]:
generator = melgan.Generator(
    melgan.GeneratorConfig(**melgan_config['melgan_generator_params']),
    name='melgan-generator',
)
discriminator = melgan.MultiScaleDiscriminator(
    melgan.DiscriminatorConfig(**melgan_config['melgan_discriminator_params']),
    name='melgan-discriminator',
)

In [6]:
mels_loss = melgan.loss.TFMelSpectrogram()

In [7]:
y = tf.placeholder(tf.float32, (None, None))
x = tf.placeholder(tf.float32, (None, None, 80))
y_hat = generator(x)
p_hat = discriminator(y_hat)
p = discriminator(tf.expand_dims(y, -1))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [9]:
p[0]

[<tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/tf_reflection_pad1d_12/MirrorPad:0' shape=(?, ?, 1) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/conv1d_26/BiasAdd:0' shape=(?, ?, 16) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/leaky_re_lu_29/LeakyRelu:0' shape=(?, ?, 16) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/group_conv1d/BiasAdd:0' shape=(?, ?, 64) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/leaky_re_lu_30/LeakyRelu:0' shape=(?, ?, 64) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/group_conv1d_1/BiasAdd:0' shape=(?, ?, 256) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/leaky_re_lu_31/LeakyRelu:0' shape=(?, ?, 256) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._0/group_conv1d_2/BiasAdd:0' shape=(?, ?, 10

In [10]:
p[1]

[<tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/tf_reflection_pad1d_13/MirrorPad:0' shape=(?, ?, 1) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/conv1d_29/BiasAdd:0' shape=(?, ?, 16) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/leaky_re_lu_35/LeakyRelu:0' shape=(?, ?, 16) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/group_conv1d_4/BiasAdd:0' shape=(?, ?, 64) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/leaky_re_lu_36/LeakyRelu:0' shape=(?, ?, 64) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/group_conv1d_5/BiasAdd:0' shape=(?, ?, 256) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/leaky_re_lu_37/LeakyRelu:0' shape=(?, ?, 256) dtype=float32>,
 <tf.Tensor 'melgan-discriminator_1/melgan_discriminator_scale_._1/group_conv1d_6/BiasAdd:0' shape=(?, ?, 

In [8]:
from malaya_speech.train.loss import calculate_2d_loss, calculate_3d_loss

In [9]:
mse_loss = tf.keras.losses.MeanSquaredError(
    reduction='none'
)
mae_loss = tf.keras.losses.MeanAbsoluteError(
    reduction='none'
)

In [10]:
adv_loss = 0.0
for i in range(len(p_hat)):
    adv_loss += calculate_3d_loss(
        tf.ones_like(p_hat[i][-1]), p_hat[i][-1], loss_fn=mse_loss
    )
adv_loss /= i + 1
fm_loss = 0.0
for i in range(len(p_hat)):
    for j in range(len(p_hat[i]) - 1):
        fm_loss += calculate_3d_loss(
            p[i][j], p_hat[i][j], loss_fn=mae_loss
        )
fm_loss /= (i + 1) * (j + 1)
adv_loss += 10 * fm_loss

spect_loss = calculate_2d_loss(
    y, tf.squeeze(y_hat, -1), loss_fn = mels_loss
)

In [11]:
real_loss = 0.0
fake_loss = 0.0
for i in range(len(p)):
    real_loss += calculate_3d_loss(
        tf.ones_like(p[i][-1]), p[i][-1], loss_fn=mse_loss
    )
    fake_loss += calculate_3d_loss(
        tf.zeros_like(p_hat[i][-1]), p_hat[i][-1], loss_fn=mse_loss
    )
real_loss /= i + 1
fake_loss /= i + 1
dis_loss = real_loss + fake_loss

In [12]:
adv_loss, dis_loss, spect_loss

(<tf.Tensor 'add_42:0' shape=(?, ?) dtype=float32>,
 <tf.Tensor 'add_49:0' shape=(?, ?) dtype=float32>,
 <tf.Tensor 'tf_mel_spectrogram/Mean:0' shape=(?,) dtype=float32>)

In [13]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [14]:
# adv_loss_, dis_loss_ = sess.run([adv_loss, dis_loss], 
#                                     feed_dict = {x: np.random.uniform(size=(1,200,80)),
#                                                  y: np.random.uniform(size=(1,51200))})

In [18]:
# y_hat_, loss, p_, p_hat_ = sess.run([y_hat, p_hat, p], 
#                                     feed_dict = {x: np.random.uniform(size=(1,200,80)),
#                                                  y: np.random.uniform(size=(1,51200))})

In [20]:
sess.run(y_hat, feed_dict = {x: np.random.uniform(size=(1,200,80))}).shape

(1, 51200, 1)