In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import re

dimension = 400
vocab = "ES abcdefghijklmnopqrstuvwxyz'"
char2idx = {char: idx for idx, char in enumerate(vocab)}
idx2char = {idx: char for idx, char in enumerate(vocab)}

def text2idx(text):
    text = re.sub(r'[^a-z ]', '', text.lower()).strip() + 'S'
    converted = [char2idx[char] for char in text]
    return text, converted

In [3]:
import tensorflow as tf
import numpy as np

train_X, train_Y = [], []
text_files = [f for f in os.listdir('spectrogram-train') if f.endswith('.npy')]
for fpath in text_files:
    try:
        splitted = fpath.split('-')
        if len(splitted) == 2:
            splitted[1] = splitted[1].split('.')[1]
            fpath = splitted[0] + '.' + splitted[1]
        with open('data/' + fpath.replace('npy', 'txt')) as fopen:
            text, converted = text2idx(fopen.read())
        w = np.load('spectrogram-train/' + fpath)
        if w.shape[1] != dimension:
            continue
        train_X.append(w)
        train_Y.append(converted)
    except:
        pass

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
test_X, test_Y = [], []
text_files = [f for f in os.listdir('spectrogram-test') if f.endswith('.npy')]
for fpath in text_files:
    with open('data/' + fpath.replace('npy', 'txt')) as fopen:
        text, converted = text2idx(fopen.read())
    w = np.load('spectrogram-test/' + fpath)
    if w.shape[1] != dimension:
        continue
    test_X.append(w)
    test_Y.append(converted)

In [5]:
len(train_X), len(test_X)

(13128, 560)

In [6]:
embed_size = 256
encoder_num_banks = 16
decoder_num_banks = 8
num_highway_blocks = 4
learning_rate = 1e-4

def embed(inputs, vocab_size, dimension, scope = 'embedding', reuse = None):
    with tf.variable_scope(scope, reuse = reuse):
        lookup_table = tf.get_variable(
            'lookup_table',
            dtype = tf.float32,
            shape = [vocab_size, dimension],
            initializer = tf.truncated_normal_initializer(
                mean = 0.0, stddev = 0.01
            ),
        )
        lookup_table = tf.concat(
            (tf.zeros(shape = [1, dimension]), lookup_table[1:, :]), 0
        )
    return tf.nn.embedding_lookup(lookup_table, inputs)


def normalize_in(inputs, activation_fn = None, scope = 'normalize_in'):
    with tf.variable_scope(scope):
        batch, steps, channels = inputs.get_shape().as_list()
        var_shape = [channels]
        mu, sigma_sq = tf.nn.moments(inputs, [1], keep_dims = True)
        shift = tf.Variable(tf.zeros(var_shape))
        scale = tf.Variable(tf.ones(var_shape))
        epsilon = 1e-8
        normalized = (inputs - mu) / (sigma_sq + epsilon) ** (0.5)
        outputs = scale * normalized + shift
        if activation_fn:
            outputs = activation_fn(outputs)
    return outputs


def conv1d(
    inputs,
    filters = None,
    size = 1,
    rate = 1,
    padding = 'SAME',
    use_bias = False,
    activation_fn = None,
    scope = 'conv1d',
    reuse = None,
):
    with tf.variable_scope(scope):
        if padding.lower() == 'causal':
            pad_len = (size - 1) * rate
            inputs = tf.pad(inputs, [[0, 0], [pad_len, 0], [0, 0]])
            padding = 'valid'
        if filters is None:
            filters = inputs.get_shape().as_list()[-1]
        params = {
            'inputs': inputs,
            'filters': filters,
            'kernel_size': size,
            'dilation_rate': rate,
            'padding': padding,
            'activation': activation_fn,
            'use_bias': use_bias,
            'reuse': reuse,
        }
        outputs = tf.layers.conv1d(**params)
    return outputs


def conv1d_banks(
    inputs, K = 16, is_training = True, scope = 'conv1d_banks', reuse = None
):
    with tf.variable_scope(scope, reuse = reuse):
        outputs = conv1d(inputs, embed_size // 2, 1)
        outputs = normalize_in(outputs, tf.nn.relu)
        for k in range(2, K + 1):
            with tf.variable_scope('num_%d' % (k)):
                output = conv1d(inputs, embed_size // 2, k)
                output = normalize_in(output, tf.nn.relu)
                outputs = tf.concat((outputs, output), -1)
    return outputs


def gru(inputs, units = None, bidirection = False, scope = 'gru', reuse = None):
    with tf.variable_scope(scope, reuse = reuse):
        if units is None:
            units = inputs.get_shape().as_list()[-1]
        cell = tf.contrib.rnn.GRUCell(units)
        if bidirection:
            cell_bw = tf.contrib.rnn.GRUCell(units)
            outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                cell, cell_bw, inputs, dtype = tf.float32
            )
            return tf.concat(outputs, 2)
        else:
            outputs, _ = tf.nn.dynamic_rnn(cell, inputs, dtype = tf.float32)
            return outputs


def attention_decoder(
    inputs, memory, units = None, scope = 'attention_decoder', reuse = None
):
    with tf.variable_scope(scope, reuse = reuse):
        if units is None:
            units = inputs.get_shape().as_list()[-1]
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            units, memory
        )
        decoder_cell = tf.contrib.rnn.GRUCell(units)
        cell_with_attention = tf.contrib.seq2seq.AttentionWrapper(
            decoder_cell, attention_mechanism, units
        )
        outputs, _ = tf.nn.dynamic_rnn(
            cell_with_attention, inputs, dtype = tf.float32
        )
    return outputs


def prenet(inputs, is_training = True, scope = 'prenet', reuse = None):
    with tf.variable_scope(scope, reuse = reuse):
        outputs = tf.layers.dense(
            inputs, units = embed_size, activation = tf.nn.relu, name = 'dense1'
        )
        outputs = tf.nn.dropout(
            outputs,
            keep_prob = 0.5 if is_training == True else 1.0,
            name = 'dropout1',
        )
        outputs = tf.layers.dense(
            outputs,
            units = embed_size // 2,
            activation = tf.nn.relu,
            name = 'dense2',
        )
        outputs = tf.nn.dropout(
            outputs,
            keep_prob = 0.5 if is_training == True else 1.0,
            name = 'dropout2',
        )
    return outputs


def highwaynet(inputs, units = None, scope = 'highwaynet', reuse = None):
    with tf.variable_scope(scope, reuse = reuse):
        if units is None:
            units = inputs.get_shape().as_list()[-1]
        H = tf.layers.dense(
            inputs, units = units, activation = tf.nn.relu, name = 'dense1'
        )
        T = tf.layers.dense(
            inputs, units = units, activation = tf.nn.sigmoid, name = 'dense2'
        )
        C = 1.0 - T
        return H * T + inputs * C


def shift_by_one(inputs):
    return tf.concat((tf.zeros_like(inputs[:, :1]), inputs[:, :-1]), 1)

def encode(inputs, is_training = True, scope = 'encoder', reuse = None):
    with tf.variable_scope(scope, reuse = reuse):
        prenet_out = prenet(inputs, scope = 'prenet', is_training = is_training)
        enc = conv1d_banks(
            prenet_out, K = encoder_num_banks, is_training = is_training
        )
        enc = tf.layers.max_pooling1d(enc, 2, 1, padding = 'same')
        enc = conv1d(enc, embed_size // 2, 3, scope = 'conv1d_1')
        enc = normalize_in(enc, activation_fn = tf.nn.relu)
        enc = conv1d(enc, embed_size // 2, 3, scope = 'conv1d_2')
        enc = normalize_in(enc, activation_fn = tf.nn.relu)
        enc += prenet_out
        for i in range(num_highway_blocks):
            enc = highwaynet(
                enc, units = embed_size // 2, scope = 'highwaynet_%d' % (i)
            )
        memory = gru(enc, embed_size // 2, True)
    return memory


def decode(
    inputs, memory, is_training = True, scope = 'decoder_layers', reuse = None
):
    with tf.variable_scope(scope, reuse = reuse):
        dec = prenet(inputs, is_training = is_training)
        dec = attention_decoder(dec, memory, embed_size)
        dec += gru(dec, embed_size, False, scope = 'gru1')
        dec += gru(dec, embed_size, False, scope = 'gru2')
        return tf.layers.dense(dec, len(char2idx))


class Model:
    def __init__(self, is_training = True):
        self.X = tf.placeholder(
            tf.float32, shape = (None, None, dimension)
        )
        self.Y = tf.placeholder(tf.int32, shape = (None, None))
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        self.decoder_inputs = embed(
            shift_by_one(self.Y), len(char2idx), embed_size
        )
        with tf.variable_scope('net'):
            self.memory = encode(self.X, is_training = is_training)
            self.outputs = decode(
                self.decoder_inputs, self.memory, is_training = is_training
            )
            self.logprobs = tf.log(tf.nn.softmax(self.outputs) + 1e-10)
            self.preds = tf.argmax(self.outputs, axis = -1)
            correct_pred = tf.equal(tf.cast(self.preds, tf.int32), self.Y)
            self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            
        masks = tf.sequence_mask(
            self.Y_seq_len,
            tf.reduce_max(self.Y_seq_len),
            dtype = tf.float32,
        )
        self.cost = tf.contrib.seq2seq.sequence_loss(
            logits = self.outputs, targets = self.Y, weights = masks
        )
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost
        )

In [7]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()
sess.run(tf.global_variables_initializer())

W0830 13:12:56.280405 140415593760576 deprecation.py:506] From /home/husein/.local/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:507: calling count_nonzero (from tensorflow.python.ops.math_ops) with axis is deprecated and will be removed in a future version.
Instructions for updating:
reduction_indices is deprecated, use axis instead
W0830 13:12:56.310420 140415593760576 deprecation.py:323] From <ipython-input-6-2afb84048410>:122: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.
W0830 13:12:56.314230 140415593760576 deprecation.py:506] From /home/husein/.local/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it t

In [8]:
train_X = tf.keras.preprocessing.sequence.pad_sequences(
    train_X, dtype = 'float32', padding = 'post'
)
train_X.shape

(13128, 48, 400)

In [9]:
test_X = tf.keras.preprocessing.sequence.pad_sequences(
    test_X, dtype = 'float32', padding = 'post'
)
test_X.shape

(560, 48, 400)

In [10]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [11]:
epoch = 20
batch_size = 64

In [12]:
from tqdm import tqdm

for e in range(epoch):
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = train_X[i : index]
        y = train_Y[i : index]
        batch_y, _ = pad_sentence_batch(y, 0)
        feed = {model.X: batch_x,
                model.Y: batch_y}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    
    pbar = tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = train_X[i : index]
        y = test_Y[i : index]
        batch_y, _ = pad_sentence_batch(y, 0)
        feed = {model.X: batch_x,
                model.Y: batch_y,}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|██████████| 206/206 [00:41<00:00,  4.25it/s, accuracy=0.783, cost=0.659]
minibatch loop: 100%|██████████| 9/9 [00:01<00:00,  6.04it/s, accuracy=0.7, cost=0.684]  
minibatch loop:   0%|          | 1/206 [00:00<00:36,  5.64it/s, accuracy=0.73, cost=0.683]

epoch 1, training avg loss 1.272585, training avg acc 0.584240
epoch 1, testing avg loss 0.683997, testing avg acc 0.721852


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.56it/s, accuracy=0.803, cost=0.525]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 15.72it/s, accuracy=0.706, cost=0.553]
minibatch loop:   0%|          | 1/206 [00:00<00:36,  5.62it/s, accuracy=0.741, cost=0.552]

epoch 2, training avg loss 0.612984, training avg acc 0.723206
epoch 2, testing avg loss 0.565084, testing avg acc 0.731944


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.40it/s, accuracy=0.803, cost=0.515]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.57it/s, accuracy=0.716, cost=0.517]
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.30it/s, accuracy=0.753, cost=0.524]

epoch 3, training avg loss 0.542716, training avg acc 0.735273
epoch 3, testing avg loss 0.527480, testing avg acc 0.740622


minibatch loop: 100%|██████████| 206/206 [00:39<00:00,  5.35it/s, accuracy=0.829, cost=0.478]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 15.71it/s, accuracy=0.723, cost=0.497]
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.36it/s, accuracy=0.761, cost=0.494]

epoch 4, training avg loss 0.510864, training avg acc 0.743509
epoch 4, testing avg loss 0.505699, testing avg acc 0.745512


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.43it/s, accuracy=0.803, cost=0.478]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.91it/s, accuracy=0.729, cost=0.47] 
minibatch loop:   0%|          | 1/206 [00:00<00:37,  5.53it/s, accuracy=0.767, cost=0.467]

epoch 5, training avg loss 0.485896, training avg acc 0.750599
epoch 5, testing avg loss 0.482513, testing avg acc 0.752478


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.35it/s, accuracy=0.829, cost=0.447]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.37it/s, accuracy=0.742, cost=0.449]
minibatch loop:   0%|          | 1/206 [00:00<00:36,  5.54it/s, accuracy=0.768, cost=0.447]

epoch 6, training avg loss 0.462700, training avg acc 0.755660
epoch 6, testing avg loss 0.461965, testing avg acc 0.756211


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.29it/s, accuracy=0.822, cost=0.408]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.07it/s, accuracy=0.737, cost=0.44] 
minibatch loop:   0%|          | 1/206 [00:00<00:39,  5.18it/s, accuracy=0.776, cost=0.422]

epoch 7, training avg loss 0.441943, training avg acc 0.761259
epoch 7, testing avg loss 0.445603, testing avg acc 0.761199


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.43it/s, accuracy=0.849, cost=0.379]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.29it/s, accuracy=0.748, cost=0.41] 
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.28it/s, accuracy=0.787, cost=0.4]

epoch 8, training avg loss 0.419698, training avg acc 0.767303
epoch 8, testing avg loss 0.426653, testing avg acc 0.765520


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.64it/s, accuracy=0.842, cost=0.365]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.72it/s, accuracy=0.75, cost=0.394] 
minibatch loop:   0%|          | 1/206 [00:00<00:40,  5.07it/s, accuracy=0.796, cost=0.368]

epoch 9, training avg loss 0.398379, training avg acc 0.773561
epoch 9, testing avg loss 0.413055, testing avg acc 0.768295


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.42it/s, accuracy=0.862, cost=0.336]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.25it/s, accuracy=0.746, cost=0.408]
minibatch loop:   0%|          | 1/206 [00:00<00:39,  5.19it/s, accuracy=0.807, cost=0.354]

epoch 10, training avg loss 0.378367, training avg acc 0.779997
epoch 10, testing avg loss 0.411091, testing avg acc 0.771466


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.30it/s, accuracy=0.849, cost=0.336]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.29it/s, accuracy=0.745, cost=0.402]
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.32it/s, accuracy=0.801, cost=0.337]

epoch 11, training avg loss 0.358436, training avg acc 0.785431
epoch 11, testing avg loss 0.402924, testing avg acc 0.772684


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.22it/s, accuracy=0.849, cost=0.308]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 15.25it/s, accuracy=0.752, cost=0.395]
minibatch loop:   0%|          | 1/206 [00:00<00:37,  5.45it/s, accuracy=0.811, cost=0.32]

epoch 12, training avg loss 0.338979, training avg acc 0.791071
epoch 12, testing avg loss 0.415512, testing avg acc 0.772348


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.45it/s, accuracy=0.855, cost=0.281]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.20it/s, accuracy=0.74, cost=0.42]  
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.30it/s, accuracy=0.818, cost=0.29]

epoch 13, training avg loss 0.319973, training avg acc 0.796918
epoch 13, testing avg loss 0.421690, testing avg acc 0.771532


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.32it/s, accuracy=0.875, cost=0.254]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.27it/s, accuracy=0.744, cost=0.432]
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.35it/s, accuracy=0.818, cost=0.28]

epoch 14, training avg loss 0.299559, training avg acc 0.802903
epoch 14, testing avg loss 0.439885, testing avg acc 0.772849


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.77it/s, accuracy=0.882, cost=0.257]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.40it/s, accuracy=0.743, cost=0.453]
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.32it/s, accuracy=0.823, cost=0.265]

epoch 15, training avg loss 0.279262, training avg acc 0.807981
epoch 15, testing avg loss 0.464145, testing avg acc 0.772904


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.52it/s, accuracy=0.914, cost=0.206]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 14.88it/s, accuracy=0.743, cost=0.476]
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.27it/s, accuracy=0.831, cost=0.241]

epoch 16, training avg loss 0.262802, training avg acc 0.813376
epoch 16, testing avg loss 0.478891, testing avg acc 0.773563


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.38it/s, accuracy=0.895, cost=0.209]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 15.17it/s, accuracy=0.744, cost=0.499]
minibatch loop:   0%|          | 1/206 [00:00<00:38,  5.36it/s, accuracy=0.837, cost=0.233]

epoch 17, training avg loss 0.244635, training avg acc 0.818782
epoch 17, testing avg loss 0.519443, testing avg acc 0.772614


minibatch loop: 100%|██████████| 206/206 [00:39<00:00,  5.48it/s, accuracy=0.882, cost=0.22] 
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 15.45it/s, accuracy=0.744, cost=0.538]
minibatch loop:   0%|          | 1/206 [00:00<00:36,  5.56it/s, accuracy=0.83, cost=0.217]

epoch 18, training avg loss 0.232706, training avg acc 0.822359
epoch 18, testing avg loss 0.540628, testing avg acc 0.771803


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.41it/s, accuracy=0.914, cost=0.169]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 15.63it/s, accuracy=0.743, cost=0.552]
minibatch loop:   0%|          | 1/206 [00:00<00:37,  5.46it/s, accuracy=0.833, cost=0.219]

epoch 19, training avg loss 0.219639, training avg acc 0.827012
epoch 19, testing avg loss 0.558029, testing avg acc 0.771475


minibatch loop: 100%|██████████| 206/206 [00:38<00:00,  5.42it/s, accuracy=0.901, cost=0.177]
minibatch loop: 100%|██████████| 9/9 [00:00<00:00, 15.38it/s, accuracy=0.74, cost=0.579] 

epoch 20, training avg loss 0.209063, training avg acc 0.829762
epoch 20, testing avg loss 0.570628, testing avg acc 0.770907





In [13]:
empty_y = np.zeros((1, len(batch_y[0])))
predicted = ''.join(
    [
        idx2char[c]
        for c in sess.run(
            model.preds, feed_dict = {model.X: batch_x[:1], model.Y: empty_y}
        )[0]
        if idx2char[c] not in ['S', 'E']
    ]
)

In [14]:
ground_truth = ''.join(
    [idx2char[c] for c in batch_y[0] if idx2char[c] not in ['S', 'E']]
)
print('predicted: %s, ground truth: %s' % (predicted, ground_truth))

predicted: syytteeword jooe, ground truth: say the word tool
