In [1]:
import tensorflow as tf
import numpy as np

def dense_to_sparse(dense_tensor, sequence_length):
    indices = tf.where(tf.sequence_mask(sequence_length))
    values = tf.gather_nd(dense_tensor, indices)
    shape = tf.shape(dense_tensor, out_type = tf.int64)
    return tf.SparseTensor(indices, values, shape)

In [2]:
import sys
import os

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [3]:
import malaya_speech
import malaya_speech.train as train
import malaya_speech.train.model.ctc as ctc






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.




In [4]:
targets = tf.placeholder(tf.int32, (None, None))
targets_mask = 1 - tf.to_int32(tf.equal(targets, 0))
targets_lengths = tf.reduce_sum(targets_mask, axis = 1)
labels = dense_to_sparse(targets, targets_lengths)

Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [5]:
sess = tf.InteractiveSession()

In [6]:
sess.run(labels, feed_dict = {targets: [[1,2,3,4], [3,3,0,0]]})

SparseTensorValue(indices=array([[0, 0],
       [0, 1],
       [0, 2],
       [0, 3],
       [1, 0],
       [1, 1]]), values=array([1, 2, 3, 4, 3, 3], dtype=int32), dense_shape=array([2, 4]))

In [7]:
actual = malaya_speech.char.encode('saya suke')
encoded = malaya_speech.char.encode('ssssaaaaayya         suuuuukka')
onehot = []
for i in encoded:
    o = np.zeros(shape = (malaya_speech.char.VOCAB_SIZE))
    o[i] = 1.0
    onehot.append(o)
    
onehot = np.array([onehot])

In [9]:
logits = tf.placeholder(tf.float32, (None, None, malaya_speech.char.VOCAB_SIZE))
targets = tf.placeholder(tf.int32, (None, None))
input_lengths = tf.placeholder(tf.int32, (None))

loss = ctc.loss.ctc_loss(logits, targets, input_lengths)




In [11]:
sess.run(loss, feed_dict = {logits: onehot, targets: [actual], input_lengths: [len(actual)]})

(54.518677, 54.518677, 10.0)

In [12]:
decoded, probs = tf.nn.ctc_greedy_decoder(tf.transpose(logits, [1, 0, 2]), 
                                          sequence_length = input_lengths,
                                          merge_repeated = True)
decoded = tf.sparse.to_dense(decoded[0])
decoded, probs

(<tf.Tensor 'SparseToDense:0' shape=(?, ?) dtype=int64>,
 <tf.Tensor 'CTCGreedyDecoder:3' shape=(?, 1) dtype=float32>)

In [13]:
sess.run(decoded, feed_dict = {logits: onehot, input_lengths: [len(encoded)]}), actual

(array([[117,  99, 123,  99,  34, 117, 119, 109,  99,   1]]),
 [117, 99, 123, 99, 34, 117, 119, 109, 103, 1])

In [14]:
seq_acc = ctc.metrics.ctc_sequence_accuracy(logits, targets, input_lengths)
sess.run(seq_acc, feed_dict = {logits: onehot, targets: [actual], input_lengths: [len(encoded)]})

Instructions for updating:
reduction_indices is deprecated, use axis instead


0.9