In [1]:
import os
import sys

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [2]:
from malaya_speech.train.model import aligner
import tensorflow as tf






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
from scipy.stats import betabinom
import numpy as np

def beta_binomial_prior_distribution(phoneme_count, mel_count, scaling_factor=1.0):
    x = np.arange(0, phoneme_count)
    mel_text_probs = []
    for i in range(1, mel_count + 1):
        a, b = scaling_factor * i, scaling_factor * (mel_count + 1 - i)
        mel_i_prob = betabinom(phoneme_count, a, b).pmf(x)
        mel_text_probs.append(mel_i_prob)
    return np.array(mel_text_probs)

attn_prior = np.expand_dims(beta_binomial_prior_distribution(50, 100), 0).astype(np.float32)
attn_prior_tf = tf.convert_to_tensor(attn_prior)
attn_prior_tf.shape, attn_prior.min(), attn_prior.max()

(TensorShape([Dimension(1), Dimension(100), Dimension(50)]),
 4.968e-41,
 0.6666667)

In [4]:
i = tf.placeholder(tf.int32, [None, None])
lens = tf.placeholder(tf.int32, [None])
mel = tf.placeholder(tf.float32, [None, None, 80])
mel_lengths = tf.placeholder(tf.int32, [None])

In [5]:
encoder = aligner.AlignmentEncoder(vocab_size = 100, vocab_embedding = 512)



Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [6]:
attention_mask = tf.expand_dims(tf.math.not_equal(i, 0), -1)
attention_mask

<tf.Tensor 'ExpandDims:0' shape=(?, ?, 1) dtype=bool>

In [7]:
attn_soft, attn_logprob = encoder(mel, i, mask = attention_mask)
attn_soft, attn_logprob

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


(<tf.Tensor 'AlignmentEncoder/transpose_4:0' shape=(?, 1, ?, ?) dtype=float32>,
 <tf.Tensor 'AlignmentEncoder/Identity:0' shape=(?, 1, ?, ?) dtype=float32>)

In [8]:
attn_hard = encoder.get_hard_attention(attn_soft, lens, mel_lengths)
attn_hard

<tf.Tensor 'PyFunc:0' shape=(?, 1, ?, ?) dtype=float32>

In [9]:
forwardsum_loss = aligner.forwardsum_loss(attn_logprob, lens, mel_lengths)
forwardsum_loss

Instructions for updating:
Use `tf.cast` instead.



<tf.Tensor 'truediv:0' shape=() dtype=float32>

In [10]:
bin_loss = aligner.bin_loss(attn_hard, attn_soft)
bin_loss

<tf.Tensor 'truediv_1:0' shape=() dtype=float32>

In [11]:
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-3).minimize(forwardsum_loss + bin_loss)

In [12]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [13]:
import pickle

with open('dataset-mel.pkl', 'rb') as fopen:
    data, d = pickle.load(fopen)

In [14]:
data.keys()

dict_keys(['mel', 'text_ids', 'len_mel', 'len_text_ids', 'stop_token_target', 'f0', 'len_f0', 'energy', 'len_energy', 'g'])

In [15]:
o = sess.run([attn_soft, attn_logprob], feed_dict = {i: data['text_ids'],
                                                    lens: data['len_text_ids'][:,0],
                                                    mel: data['mel'],
                                                    mel_lengths: data['len_mel'][:,0]})
o[0].shape, o[1].shape

((1, 1, 408, 72), (1, 1, 408, 72))

In [16]:
for no in range(100):
    o = sess.run([forwardsum_loss, bin_loss, optimizer], feed_dict = {i: data['text_ids'],
                                                        lens: data['len_text_ids'][:,0],
                                                        mel: data['mel'],
                                                        mel_lengths: data['len_mel'][:,0]})
    print(no, o)

0 [1459.0544, 6.00924, None]
1 [1458.922, 6.010376, None]
2 [1458.725, 6.009456, None]
3 [1458.3358, 6.007429, None]
4 [1457.6276, 6.0045724, None]
5 [1456.4135, 6.001406, None]
6 [1454.4264, 5.9989, None]
7 [1451.3136, 5.9970064, None]
8 [1446.8091, 5.9959426, None]
9 [1441.2488, 5.99627, None]
10 [1432.3127, 5.99477, None]
11 [1419.6083, 5.993242, None]
12 [1401.8644, 5.993187, None]
13 [1376.2357, 5.9956164, None]
14 [1339.6927, 6.001824, None]
15 [1289.0574, 6.0142713, None]
16 [1221.0515, 6.0310707, None]
17 [1134.6243, 6.053431, None]
18 [1033.6254, 6.0804873, None]
19 [929.02783, 6.1214333, None]
20 [838.6264, 6.179869, None]
21 [782.5479, 6.254599, None]
22 [769.0967, 6.3337784, None]
23 [783.69055, 6.400559, None]
24 [801.9633, 6.4450502, None]
25 [808.2735, 6.4687514, None]
26 [796.81964, 6.477003, None]
27 [768.8204, 6.4584885, None]
28 [732.1786, 6.4360266, None]
29 [694.6295, 6.4119573, None]
30 [661.8169, 6.3863454, None]
31 [634.8101, 6.3593745, None]
32 [612.2522, 6.331

In [17]:
tf.trainable_variables()

[<tf.Variable 'AlignmentEncoder/embeddings:0' shape=(100, 512) dtype=float32_ref>,
 <tf.Variable 'AlignmentEncoder/sequential/ConvNorm/conv1d/kernel:0' shape=(3, 512, 1024) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential/ConvNorm/conv1d/bias:0' shape=(1024,) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential/ConvNorm_1/conv1d_1/kernel:0' shape=(1, 1024, 80) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential/ConvNorm_1/conv1d_1/bias:0' shape=(80,) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential_1/ConvNorm/conv1d_2/kernel:0' shape=(3, 80, 160) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential_1/ConvNorm/conv1d_2/bias:0' shape=(160,) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential_1/ConvNorm_1/conv1d_3/kernel:0' shape=(1, 160, 80) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential_1/ConvNorm_1/conv1d_3/bias:0' shape=(80,) dtype=float32>,
 <tf.Variable 'AlignmentEncoder/sequential_1/ConvNorm_2/conv1d_4/kernel:0' shape=(1, 80

In [18]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'test/model.ckpt')

'test/model.ckpt'

In [19]:
!ls -lh test
!rm -rf test

total 14360
-rw-r--r--  1 huseinzolkepli  staff    77B Sep 23 01:31 checkpoint
-rw-r--r--  1 huseinzolkepli  staff   6.7M Sep 23 01:31 model.ckpt.data-00000-of-00001
-rw-r--r--  1 huseinzolkepli  staff   579B Sep 23 01:31 model.ckpt.index
-rw-r--r--  1 huseinzolkepli  staff   274K Sep 23 01:31 model.ckpt.meta
