In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import sys

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [3]:
import tensorflow as tf
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import universal_melgan as melgan
import malaya_speech.config
import numpy as np

In [10]:
melgan_config = malaya_speech.config.universal_melgan_config
melgan_config['melgan_generator_params']['filters'] = 128
melgan_config['melgan_generator_params']['upsample_scales'] = [4,4,4]

In [11]:
generator = melgan.Generator(
    melgan.GeneratorConfig(**melgan_config['melgan_generator_params']),
    name='universalmelgan-generator',
)
discriminator = melgan.MultiScaleDiscriminator(
    melgan.WaveFormDiscriminatorConfig(
        **melgan_config['melgan_waveform_discriminator_params']
    ),
    melgan.STFTDiscriminatorConfig(
        **melgan_config['melgan_stft_discriminator_params']
    ),
    name='universalmelgan-discriminator',
)

In [12]:
y = tf.placeholder(tf.float32, (None, None))
x = tf.placeholder(tf.float32, (None, None, 80))
y_hat = generator(x)
p_hat = discriminator(y_hat)
p = discriminator(tf.expand_dims(y, -1))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [13]:
from malaya_speech.train.loss import calculate_2d_loss, calculate_3d_loss

In [14]:
mse_loss = tf.keras.losses.MeanSquaredError()
mae_loss = tf.keras.losses.MeanAbsoluteError()

In [15]:
adv_loss = 0.0
for i in range(len(p_hat)):
    adv_loss += mse_loss(tf.ones_like(p_hat[i][-1]), p_hat[i][-1])
    
adv_loss /= i + 1
fm_loss = 0.0
for i in range(len(p_hat)):
    for j in range(len(p_hat[i]) - 1):
        fm_loss += mae_loss(p[i][j], p_hat[i][j])
        
fm_loss /= (i + 1) * (j + 1)
adv_loss += 10 * fm_loss

In [16]:
real_loss = 0.0
fake_loss = 0.0
for i in range(len(p)):
    real_loss += mse_loss(tf.ones_like(p[i][-1]), p[i][-1])
    fake_loss += mse_loss(tf.zeros_like(p_hat[i][-1]), p_hat[i][-1])
    
real_loss /= i + 1
fake_loss /= i + 1
dis_loss = real_loss + fake_loss

In [17]:
adv_loss, dis_loss

(<tf.Tensor 'add_72:0' shape=() dtype=float32>,
 <tf.Tensor 'add_85:0' shape=() dtype=float32>)

In [18]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [19]:
# adv_loss_, dis_loss_ = sess.run([adv_loss, dis_loss], 
#                                     feed_dict = {x: np.random.uniform(size=(1,200,80)),
#                                                  y: np.random.uniform(size=(1,51200))})

In [20]:
# y_hat_, loss, p_, p_hat_ = sess.run([y_hat, p_hat, p], 
#                                     feed_dict = {x: np.random.uniform(size=(1,200,80)),
#                                                  y: np.random.uniform(size=(1,51200))})

In [21]:
sess.run(y_hat, feed_dict = {x: np.random.uniform(size=(1,200,80))}).shape

(1, 12800, 1)

In [22]:
saver = tf.train.Saver()
saver.save(sess, 'test/model.ckpt')

'test/model.ckpt'

In [23]:
!ls -lh test

total 47920
-rw-r--r--  1 huseinzolkepli  staff    77B Apr 29 13:26 checkpoint
-rw-r--r--  1 huseinzolkepli  staff    22M Apr 29 13:26 model.ckpt.data-00000-of-00001
-rw-r--r--  1 huseinzolkepli  staff   6.5K Apr 29 13:26 model.ckpt.index
-rw-r--r--  1 huseinzolkepli  staff   1.5M Apr 29 13:26 model.ckpt.meta


In [24]:
!rm -rf test