In [1]:
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
import re
import time
import collections
import os
import itertools
from tqdm import tqdm

In [2]:
def build_dataset(words, n_words, atleast=1):
    count = [['PAD', 0], ['GO', 1], ['EOS', 2], ['UNK', 3]]
    counter = collections.Counter(words).most_common(n_words)
    counter = [i for i in counter if i[1] >= atleast]
    count.extend(counter)
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 0)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary

In [3]:
import json

with open('pos-training.json') as fopen:
    texts = json.load(fopen)
    
before, after = [], []
    
for splitted in texts:
    if len(splitted) < 2:
        continue
    if not len(splitted[0]):
        continue
    before.append(list(splitted[0]))
    after.append(list(splitted[1]))
    
assert len(before) == len(after)

In [4]:
concat_from = list(itertools.chain(*before))
vocabulary_size_from = len(list(set(concat_from)))
data_from, count_from, dictionary_from, rev_dictionary_from = build_dataset(concat_from, vocabulary_size_from)
print('vocab from size: %d'%(vocabulary_size_from))
print('Most common words', count_from[4:10])
print('Sample data', data_from[:10], [rev_dictionary_from[i] for i in data_from[:10]])
print('filtered vocab size:',len(dictionary_from))
print("% of vocab used: {}%".format(round(len(dictionary_from)/vocabulary_size_from,4)*100))

vocab from size: 27
Most common words [('a', 781600), ('e', 493676), ('i', 478435), ('n', 475565), ('r', 359020), ('o', 332268)]
Sample data [8, 4, 11, 5, 13, 4, 5, 11, 6, 21] ['r', 'a', 't', 'e', 'l', 'a', 'e', 't', 'i', 'p']
filtered vocab size: 31
% of vocab used: 114.80999999999999%


In [5]:
concat_to = list(itertools.chain(*after))
vocabulary_size_to = len(list(set(concat_to)))
data_to, count_to, dictionary_to, rev_dictionary_to = build_dataset(concat_to, vocabulary_size_to)
print('vocab from size: %d'%(vocabulary_size_to))
print('Most common words', count_to[4:10])
print('Sample data', data_to[:10], [rev_dictionary_to[i] for i in data_to[:10]])
print('filtered vocab size:',len(dictionary_to))
print("% of vocab used: {}%".format(round(len(dictionary_to)/vocabulary_size_to,4)*100))

vocab from size: 27
Most common words [('a', 747317), ('i', 464716), ('e', 459686), ('n', 433253), ('r', 348899), ('o', 332268)]
Sample data [8, 4, 11, 6, 13, 4, 6, 11, 5, 21] ['r', 'a', 't', 'e', 'l', 'a', 'e', 't', 'i', 'p']
filtered vocab size: 31
% of vocab used: 114.80999999999999%


In [6]:
GO = dictionary_from['GO']
PAD = dictionary_from['PAD']
EOS = dictionary_from['EOS']
UNK = dictionary_from['UNK']

In [7]:
for i in range(len(after)):
    after[i].append('EOS')

In [8]:
class Stemmer:
    def __init__(self, size_layer, num_layers, embedded_size, 
                 from_dict_size, to_dict_size, learning_rate, 
                 dropout = 0.8, beam_width = 15, force_teaching_ratio=0.5):
        
        def lstm_cell(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size_layer, reuse=reuse)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]

        encoder_embeddings = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        encoder_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)])
        self.encoder_out, self.encoder_state = tf.nn.dynamic_rnn(cell = encoder_cells, 
                                                                 inputs = encoder_embedded, 
                                                                 sequence_length = self.X_seq_len,
                                                                 dtype = tf.float32)
        
        encoder_state = tuple(self.encoder_state[-1] for _ in range(num_layers))
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        decoder_embeddings = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
        dense_layer = tf.layers.Dense(to_dict_size)
        
        with tf.variable_scope('decode'):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            num_units = size_layer, 
            memory = encoder_embedded,
            memory_sequence_length = self.X_seq_len)
            decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)]),
                attention_mechanism = attention_mechanism,
                attention_layer_size = size_layer)
            main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
            decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
            training_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
            inputs = tf.nn.embedding_lookup(decoder_embeddings, decoder_input),
                sequence_length = self.Y_seq_len,
                embedding = decoder_embeddings,
                sampling_probability = 1 - force_teaching_ratio,
                time_major = False)
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cell,
                helper = training_helper,
                initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state),
                output_layer = dense_layer)
            training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = training_decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.Y_seq_len))
            
        with tf.variable_scope('decode', reuse=True):
            encoder_out_tiled = tf.contrib.seq2seq.tile_batch(encoder_embedded, beam_width)
            encoder_state_tiled = tf.contrib.seq2seq.tile_batch(encoder_state, beam_width)
            X_seq_len_tiled = tf.contrib.seq2seq.tile_batch(self.X_seq_len, beam_width)
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units = size_layer, 
                memory = encoder_out_tiled,
                memory_sequence_length = X_seq_len_tiled)
            decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell(reuse=True) for _ in range(num_layers)]),
                attention_mechanism = attention_mechanism,
                attention_layer_size = size_layer)
            predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                cell = decoder_cell,
                embedding = decoder_embeddings,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS,
                initial_state = decoder_cell.zero_state(batch_size * beam_width, tf.float32).clone(cell_state = encoder_state_tiled),
                beam_width = beam_width,
                output_layer = dense_layer,
                length_penalty_weight = 0.0)
            predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = predicting_decoder,
                impute_finished = False,
                maximum_iterations = 2 * tf.reduce_max(self.X_seq_len))
            
            
        self.training_logits = training_decoder_output.rnn_output
        self.predicting_ids = tf.identity(predicting_decoder_output.predicted_ids[:, :, 0],name="logits")
        
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [9]:
size_layer = 256
num_layers = 2
embedded_size = 128
learning_rate = 1e-3
batch_size = 128
epoch = 10

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Stemmer(size_layer, num_layers, embedded_size, len(dictionary_from), 
                len(dictionary_to), learning_rate)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
def str_idx(corpus, dic, UNK=3):
    X = []
    for i in corpus:
        ints = []
        for k in i:
            ints.append(dic.get(k, UNK))
        X.append(ints)
    return X

In [12]:
X = str_idx(before, dictionary_from)
Y = str_idx(after, dictionary_to)

In [13]:
from sklearn.cross_validation import train_test_split
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.1)



In [14]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [15]:
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break
    total_loss, total_accuracy, total_loss_test, total_accuracy_test = 0, 0, 0, 0
    train_X, train_Y = shuffle(train_X, train_Y)
    test_X, test_Y = shuffle(test_X, test_Y)
    pbar = tqdm(range(0, len(train_X), batch_size), desc='train minibatch loop')
    for k in pbar:
        batch_x, _ = pad_sentence_batch(train_X[k: min(k+batch_size,len(train_X))], PAD)
        batch_y, _ = pad_sentence_batch(train_Y[k: min(k+batch_size,len(train_X))], PAD)
        acc, loss, _ = sess.run([model.accuracy, model.cost, model.optimizer], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})
        total_loss += loss
        total_accuracy += acc
        pbar.set_postfix(cost=loss, accuracy = acc)
        
    pbar = tqdm(range(0, len(test_X), batch_size), desc='test minibatch loop')
    for k in pbar:
        batch_x, _ = pad_sentence_batch(test_X[k: min(k+batch_size,len(test_X))], PAD)
        batch_y, _ = pad_sentence_batch(test_Y[k: min(k+batch_size,len(test_X))], PAD)
        acc, loss = sess.run([model.accuracy, model.cost], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})
        total_loss_test += loss
        total_accuracy_test += acc
        pbar.set_postfix(cost=loss, accuracy = acc)
        
    total_loss /= (len(train_X) / batch_size)
    total_accuracy /= (len(train_X) / batch_size)
    total_loss_test /= (len(test_X) / batch_size)
    total_accuracy_test /= (len(test_X) / batch_size)
    
    if total_accuracy_test > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, total_accuracy_test)
        )
        CURRENT_ACC = total_accuracy_test
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
        
    print('epoch: %d, avg loss: %f, avg accuracy: %f'%(EPOCH, total_loss, total_accuracy))
    print('epoch: %d, avg loss test: %f, avg accuracy test: %f'%(EPOCH, total_loss_test, total_accuracy_test))
    EPOCH += 1

train minibatch loop: 100%|██████████| 5203/5203 [09:39<00:00,  8.87it/s, accuracy=0.982, cost=0.0468]
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.03it/s, accuracy=0.983, cost=0.0785]


epoch: 0, pass acc: 0.000000, current acc: 0.976588
epoch: 0, avg loss: 0.222882, avg accuracy: 0.926648
epoch: 0, avg loss test: 0.077206, avg accuracy test: 0.976588


train minibatch loop: 100%|██████████| 5203/5203 [09:32<00:00,  9.09it/s, accuracy=0.985, cost=0.0459] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.26it/s, accuracy=0.98, cost=0.0531] 


epoch: 1, pass acc: 0.976588, current acc: 0.981308
epoch: 1, avg loss: 0.067740, avg accuracy: 0.977963
epoch: 1, avg loss test: 0.061481, avg accuracy test: 0.981308


train minibatch loop: 100%|██████████| 5203/5203 [09:33<00:00, 10.12it/s, accuracy=0.994, cost=0.0206] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 21.94it/s, accuracy=1, cost=0.012]      


epoch: 2, pass acc: 0.981308, current acc: 0.985202
epoch: 2, avg loss: 0.052667, avg accuracy: 0.982614
epoch: 2, avg loss test: 0.049175, avg accuracy test: 0.985202


train minibatch loop: 100%|██████████| 5203/5203 [09:34<00:00,  9.06it/s, accuracy=0.994, cost=0.0187] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.64it/s, accuracy=0.969, cost=0.123]  


epoch: 3, pass acc: 0.985202, current acc: 0.986750
epoch: 3, avg loss: 0.044473, avg accuracy: 0.985173
epoch: 3, avg loss test: 0.044336, avg accuracy test: 0.986750


train minibatch loop: 100%|██████████| 5203/5203 [09:35<00:00,  7.25it/s, accuracy=0.988, cost=0.0357] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.18it/s, accuracy=1, cost=0.01]       


epoch: 4, pass acc: 0.986750, current acc: 0.987332
epoch: 4, avg loss: 0.039572, avg accuracy: 0.986687
epoch: 4, avg loss test: 0.042780, avg accuracy test: 0.987332


train minibatch loop: 100%|██████████| 5203/5203 [09:33<00:00,  8.73it/s, accuracy=0.993, cost=0.0198] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.03it/s, accuracy=0.989, cost=0.0157] 


epoch: 5, pass acc: 0.987332, current acc: 0.987825
epoch: 5, avg loss: 0.035509, avg accuracy: 0.988079
epoch: 5, avg loss test: 0.041239, avg accuracy test: 0.987825


train minibatch loop: 100%|██████████| 5203/5203 [09:32<00:00,  7.90it/s, accuracy=0.987, cost=0.0403] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 21.87it/s, accuracy=0.977, cost=0.057]  


epoch: 6, avg loss: 0.032815, avg accuracy: 0.988991
epoch: 6, avg loss test: 0.041939, avg accuracy test: 0.987556


train minibatch loop: 100%|██████████| 5203/5203 [09:34<00:00,  9.06it/s, accuracy=0.99, cost=0.0302]  
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.06it/s, accuracy=1, cost=0.00235]    


epoch: 7, pass acc: 0.987825, current acc: 0.988543
epoch: 7, avg loss: 0.029815, avg accuracy: 0.989973
epoch: 7, avg loss test: 0.039594, avg accuracy test: 0.988543


train minibatch loop: 100%|██████████| 5203/5203 [09:31<00:00,  9.11it/s, accuracy=0.995, cost=0.0183] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 24.30it/s, accuracy=1, cost=0.00208]    


epoch: 8, pass acc: 0.988543, current acc: 0.989321
epoch: 8, avg loss: 0.027337, avg accuracy: 0.990879
epoch: 8, avg loss test: 0.038017, avg accuracy test: 0.989321


train minibatch loop: 100%|██████████| 5203/5203 [09:32<00:00, 10.70it/s, accuracy=0.998, cost=0.00959]
test minibatch loop: 100%|██████████| 579/579 [00:25<00:00, 22.30it/s, accuracy=1, cost=0.00726]    


epoch: 9, pass acc: 0.989321, current acc: 0.989664
epoch: 9, avg loss: 0.025447, avg accuracy: 0.991583
epoch: 9, avg loss test: 0.038016, avg accuracy test: 0.989664


train minibatch loop: 100%|██████████| 5203/5203 [09:35<00:00,  8.35it/s, accuracy=0.995, cost=0.0278] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 24.33it/s, accuracy=0.98, cost=0.0321]  


epoch: 10, avg loss: 0.023914, avg accuracy: 0.992098
epoch: 10, avg loss test: 0.039399, avg accuracy test: 0.989663


train minibatch loop: 100%|██████████| 5203/5203 [09:32<00:00,  7.57it/s, accuracy=0.992, cost=0.0215] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.12it/s, accuracy=1, cost=8.16e-5]    


epoch: 11, pass acc: 0.989664, current acc: 0.990160
epoch: 11, avg loss: 0.021716, avg accuracy: 0.992846
epoch: 11, avg loss test: 0.036509, avg accuracy test: 0.990160


train minibatch loop: 100%|██████████| 5203/5203 [09:33<00:00, 12.02it/s, accuracy=0.997, cost=0.00617]
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 21.97it/s, accuracy=1, cost=3.94e-5]    


epoch: 12, pass acc: 0.990160, current acc: 0.990192
epoch: 12, avg loss: 0.020396, avg accuracy: 0.993342
epoch: 12, avg loss test: 0.037646, avg accuracy test: 0.990192


train minibatch loop: 100%|██████████| 5203/5203 [09:32<00:00,  9.09it/s, accuracy=0.998, cost=0.00599]
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.05it/s, accuracy=0.933, cost=0.402]  


epoch: 13, pass acc: 0.990192, current acc: 0.990418
epoch: 13, avg loss: 0.018951, avg accuracy: 0.993821
epoch: 13, avg loss test: 0.037511, avg accuracy test: 0.990418


train minibatch loop: 100%|██████████| 5203/5203 [09:32<00:00,  9.08it/s, accuracy=0.994, cost=0.0133] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.09it/s, accuracy=1, cost=0.000402]   


epoch: 14, pass acc: 0.990418, current acc: 0.990733
epoch: 14, avg loss: 0.017463, avg accuracy: 0.994335
epoch: 14, avg loss test: 0.038398, avg accuracy test: 0.990733


train minibatch loop: 100%|██████████| 5203/5203 [09:33<00:00,  9.93it/s, accuracy=0.999, cost=0.00922]
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.05it/s, accuracy=0.989, cost=0.0117] 


epoch: 15, avg loss: 0.016940, avg accuracy: 0.994543
epoch: 15, avg loss test: 0.038985, avg accuracy test: 0.990695


train minibatch loop: 100%|██████████| 5203/5203 [09:32<00:00, 10.98it/s, accuracy=0.998, cost=0.00742]
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 22.07it/s, accuracy=1, cost=0.000139]   


epoch: 16, avg loss: 0.015848, avg accuracy: 0.994932
epoch: 16, avg loss test: 0.041539, avg accuracy test: 0.990592


train minibatch loop: 100%|██████████| 5203/5203 [09:33<00:00,  9.08it/s, accuracy=0.991, cost=0.0227] 
test minibatch loop: 100%|██████████| 579/579 [00:26<00:00, 24.10it/s, accuracy=1, cost=0.000649]   

epoch: 17, avg loss: 0.015352, avg accuracy: 0.995150
epoch: 17, avg loss test: 0.038758, avg accuracy test: 0.990040
break epoch:18






In [16]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, "beamsearch-bahdanau/model.ckpt")

'beamsearch-bahdanau/model.ckpt'

In [17]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
    ]
)

In [18]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            "directory: %s" % model_dir)

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + "/frozen_model.pb"
    clear_devices = True
    with tf.Session(graph=tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(",")
        ) 
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))

In [19]:
freeze_graph("beamsearch-bahdanau", strings)

INFO:tensorflow:Restoring parameters from beamsearch-bahdanau/model.ckpt
INFO:tensorflow:Froze 16 variables.
INFO:tensorflow:Converted 16 variables to const ops.
1755 ops in the final graph.


In [20]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [21]:
g=load_graph('beamsearch-bahdanau/frozen_model.pb')

In [22]:
x = g.get_tensor_by_name('import/Placeholder:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph=g)
predicted = test_sess.run(logits,feed_dict={x:str_idx(['kecomelan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: comel




In [23]:
x = g.get_tensor_by_name('import/Placeholder:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph=g)
predicted = test_sess.run(logits,feed_dict={x:str_idx(['kecomelkan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: comel


In [24]:
predicted = test_sess.run(logits,feed_dict={x:str_idx(['kejalanan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: jalan


In [25]:
import json
with open('beamsearch-bahdanau-stem.json','w') as fopen:
    fopen.write(json.dumps({'dictionary_from':dictionary_from,
                'dictionary_to':dictionary_to,
                'rev_dictionary_to':rev_dictionary_to,
                'rev_dictionary_from':rev_dictionary_from}))