In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import sys

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [3]:
import malaya_speech
import malaya_speech.config
from malaya_speech.train.model import lightspeech, fastspeech2
import tensorflow as tf
import numpy as np

In [4]:
config = malaya_speech.config.fastspeech2_config
config = fastspeech2.Config(vocab_size = 66, **config)
model = lightspeech.Model(config)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [5]:
i = tf.placeholder(tf.int32, [None, None])
lens = tf.placeholder(tf.int32, [None, None])
mel_outputs = tf.placeholder(tf.float32, [None, None, 80])
mel_lengths = tf.placeholder(tf.int32, [None])
energies = tf.placeholder(tf.float32, [None, None])
energies_lengths = tf.placeholder(tf.int32, [None])
f0s = tf.placeholder(tf.float32, [None, None])
f0s_lengths = tf.placeholder(tf.int32, [None])

In [6]:
mel_before, mel_after, duration_outputs, f0_outputs, energy_outputs = model(i, lens, f0s, energies, training = True)



Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [7]:
from malaya_speech.train.model.lightspeech.model import EncSepConvLayer

In [8]:
mel_before, mel_after, duration_outputs, f0_outputs, energy_outputs

(<tf.Tensor 'model/mel_before/BiasAdd:0' shape=(?, ?, 80) dtype=float32>,
 <tf.Tensor 'model/add_3:0' shape=(?, ?, 80) dtype=float32>,
 <tf.Tensor 'model/duration_predictor/Squeeze:0' shape=(?, ?) dtype=float32>,
 <tf.Tensor 'model/f0_predictor/Squeeze:0' shape=(?, ?) dtype=float32>,
 <tf.Tensor 'model/energy_predictor/Squeeze:0' shape=(?, ?) dtype=float32>)

In [9]:
loss_f = tf.losses.mean_squared_error

In [10]:
log_duration = tf.math.log(
    tf.cast(tf.math.add(lens, 1), tf.float32)
)
duration_loss = loss_f(log_duration, duration_outputs)

In [11]:
max_length = tf.cast(tf.reduce_max(mel_lengths), tf.int32)
mask = tf.sequence_mask(
    lengths = mel_lengths, maxlen = max_length, dtype = tf.float32
)
mask = tf.expand_dims(mask, axis = -1)
mask

<tf.Tensor 'ExpandDims:0' shape=(?, ?, 1) dtype=float32>

In [12]:
from functools import partial
from malaya_speech.train.loss import calculate_2d_loss, calculate_3d_loss

mse_mel = partial(
    loss_f,
    weights = mask
)

In [13]:
mel_loss_before = calculate_3d_loss(mel_outputs, mel_before, mse_mel)
mel_loss_before

<tf.Tensor 'mean_squared_error_1/value:0' shape=() dtype=float32>

In [14]:
mel_loss_after = calculate_3d_loss(mel_outputs, mel_after, mse_mel)
mel_loss_after

<tf.Tensor 'mean_squared_error_2/value:0' shape=() dtype=float32>

In [15]:
max_length = tf.cast(tf.reduce_max(energies_lengths), tf.int32)
mask = tf.sequence_mask(
    lengths = energies_lengths, maxlen = max_length, dtype = tf.float32
)
energies_mel = partial(
    loss_f,
    weights = mask
)
energies_loss = calculate_2d_loss(energies, energy_outputs, energies_mel)

In [16]:
max_length = tf.cast(tf.reduce_max(f0s_lengths), tf.int32)
mask = tf.sequence_mask(
    lengths = f0s_lengths, maxlen = max_length, dtype = tf.float32
)
energies_mel = partial(
    loss_f,
    weights = mask
)
f0s_loss = calculate_2d_loss(f0s, f0_outputs, energies_mel)

In [17]:
loss = duration_loss + mel_loss_before + mel_loss_after + energies_loss + f0s_loss

In [18]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [19]:
import pickle

with open('dataset-mel.pkl', 'rb') as fopen:
    data, d = pickle.load(fopen)

In [20]:
data.keys()

dict_keys(['mel', 'text_ids', 'len_mel', 'len_text_ids', 'stop_token_target', 'f0', 'len_f0', 'energy', 'len_energy', 'g'])

In [21]:
d

array([ 0,  5,  5,  3,  6,  7,  6,  1,  2, 14,  4,  0,  0,  0, 12,  0, 10,
       10, 13, 14,  1,  6,  7,  5,  3,  6,  1,  4, 12,  3, 11,  7,  5,  3,
        0,  2,  0,  3,  8,  4, 13, 12,  8,  7,  0,  0,  3,  2,  5, 11,  5,
        7,  8, 14, 11,  6,  4,  9,  6,  7,  1,  6,  5,  5,  6,  7, 12,  8,
        6,  2,  3,  6])

In [22]:
def average_by_duration(x, durs):
    mel_len = durs.sum()
    durs_cum = np.cumsum(np.pad(durs, (1, 0)))
    
    x_char = np.zeros((durs.shape[0],), dtype=np.float32)
    for idx, start, end in zip(range(mel_len), durs_cum[:-1], durs_cum[1:]):
        values = x[start:end][np.where(x[start:end] != 0.0)[0]]
        x_char[idx] = np.mean(values) if len(values) > 0 else 0.0

    return x_char.astype(np.float32)

In [23]:
f0 = average_by_duration(data['f0'][0], d)
energy = average_by_duration(data['energy'][0], d)

In [24]:
f0.shape, energy.shape, d.shape

((72,), (72,), (72,))

In [25]:
data['text_ids'].shape

(1, 72)

In [26]:
r = sess.run([duration_loss, mel_loss_before, mel_loss_after, energies_loss, f0s_loss], 
         feed_dict = {i: data['text_ids'],
                      lens: [d],
                      mel_outputs:data['mel'],
                      mel_lengths:data['len_mel'][0],
                      energies: [energy],
                      energies_lengths: [len(energy)],
                      f0s: [f0],
                      f0s_lengths: [len(f0)]})

InvalidArgumentError: Current implementation only supports equal length strides in the row and column dimensions.
	 [[node model/encoder/enc_sep_conv_layer/conv_separable/depthwise_conv2d/depthwise (defined at /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]

Original stack trace for 'model/encoder/enc_sep_conv_layer/conv_separable/depthwise_conv2d/depthwise':
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
    self._run_once()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
    handle._run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 381, in dispatch_queue
    yield self.process_one()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 545, in execute_request
    user_expressions, allow_stdin,
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 306, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2877, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2922, in _run_cell
    return runner(coro)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3146, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3337, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-207ba965602b>", line 1, in <module>
    mel_before, mel_after, duration_outputs, f0_outputs, energy_outputs = model(i, lens, f0s, energies, training = True)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "/Users/huseinzolkepli/Documents/malaya-speech/malaya_speech/train/model/lightspeech/model.py", line 276, in call
    last_encoder_hidden_states = self.encoder((embedding_output, attention_mask),
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "/Users/huseinzolkepli/Documents/malaya-speech/malaya_speech/train/model/lightspeech/model.py", line 157, in call
    o = self.layer1((hidden_states, attention_mask), training=training)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "/Users/huseinzolkepli/Documents/malaya-speech/malaya_speech/train/model/lightspeech/model.py", line 112, in call
    x = self.activation_fn(self.conv1(x))
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "/Users/huseinzolkepli/Documents/malaya-speech/malaya_speech/train/model/lightspeech/model.py", line 64, in call
    x = self.depthwise_conv(tf.expand_dims(x, 2))[:, :, 0]
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/convolutional.py", line 1829, in call
    data_format=self.data_format)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py", line 5003, in depthwise_conv2d
    data_format=tf_data_format)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py", line 819, in depthwise_conv2d
    op=op)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_ops.py", line 483, in with_space_to_batch
    return new_op(input, None)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_ops.py", line 639, in __call__
    return self.call(inp, filter)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_ops.py", line 473, in <lambda>
    return lambda inp, _: op(inp, num_spatial_dims, padding)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py", line 811, in op
    name=name)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_nn_ops.py", line 2401, in depthwise_conv2d_native
    dilations=dilations, name=name)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
    op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
    attrs, op_def, compute_device)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
    op_def=op_def)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
    self._traceback = tf_stack.extract_stack()


In [None]:
r

In [None]:
saver = tf.train.Saver()
saver.save(sess, 'test/model.ckpt')

In [None]:
!ls -lh test