In [1]:
import sys
import os

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [2]:
import tensorflow as tf
import numpy as np

In [3]:
tf.compat.v1.enable_eager_execution()

In [4]:
import malaya_speech
import malaya_speech.config
import malaya_speech.train.model.conformer as conformer
import malaya_speech.train.model.transducer as transducer
import tensorflow as tf






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [5]:
config = malaya_speech.config.transducer_featurizer_config
featurizer = malaya_speech.utils.tf_featurization.STTFeaturizer(**config)

In [6]:
config = malaya_speech.config.conformer_small_encoder_config
small_model = conformer.Model(**config)

In [7]:
config = malaya_speech.config.conformer_small_decoder_config
small_transducer = transducer.rnn_multilanguage.Model(small_model, language_size = 2,
                                        vocabulary_size = 100, 
                                        training_mode = True,
                                        **config)




In [8]:
small_model.conv_subsampling.time_reduction_factor

4

In [9]:
y, sr = malaya_speech.load('../speech/example-speaker/husein-zolkepli.wav', sr = 16000)
y1, sr = malaya_speech.load('../speech/example-speaker/shafiqah-idayu.wav', sr = 16000)
len(y), len(y1)

(90090, 56298)

In [10]:
padded, lens = malaya_speech.padding.sequence_1d([y, y1], return_len = True)
v = tf.expand_dims(tf.map_fn(featurizer.vectorize, padded.astype(np.float32)), -1)
v.shape

TensorShape([Dimension(2), Dimension(561), Dimension(80), Dimension(1)])

In [11]:
enc = small_transducer.encoder(v, training = True)
enc.shape

TensorShape([Dimension(2), Dimension(141), Dimension(144)])

In [12]:
masks = tf.sequence_mask([140, 140], 140, dtype=tf.float32)
masks

<tf.Tensor: id=13280, shape=(2, 140), dtype=float32, numpy=
array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1.,

In [13]:
t = [[1,1,1,1,1,1], [1,1,1,1,1,1]]
l = tf.convert_to_tensor([1, 0])
l = tf.tile(tf.expand_dims(l, -1), (1, tf.shape(enc)[1]))

In [14]:
small_logits, outputs_language = small_transducer([v, t, [6, 6], l], training = True)
small_logits.shape, outputs_language.shape

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


(TensorShape([Dimension(2), Dimension(141), Dimension(6), Dimension(100)]),
 TensorShape([Dimension(2), Dimension(141), Dimension(2)]))

In [15]:
masks = tf.sequence_mask([141, 141], 141, dtype=tf.float32)
tf.contrib.seq2seq.sequence_loss(logits = outputs_language,
                                                     targets = l,
                                                     weights = masks)

<tf.Tensor: id=20720, shape=(), dtype=float32, numpy=1.0160404>

In [16]:
small_transducer.joint_net.training_mode = False

In [17]:
small_transducer.greedy_decoder(v, np.array([141, 141]), training = False)

(<tf.Tensor: id=46059, shape=(2, 561), dtype=int32, numpy=
 array([[68, 68, 68, ...,  0,  0,  0],
        [68, 68, 68, ...,  0,  0,  0]], dtype=int32)>,
 <tf.Tensor: id=46079, shape=(2, 561), dtype=int32, numpy=
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>)