In [1]:
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
import re
import time
import collections
import os
import itertools
from tqdm import tqdm

In [2]:
def build_dataset(words, n_words, atleast=1):
    count = [['PAD', 0], ['GO', 1], ['EOS', 2], ['UNK', 3]]
    counter = collections.Counter(words).most_common(n_words)
    counter = [i for i in counter if i[1] >= atleast]
    count.extend(counter)
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 0)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary

In [3]:
import json

with open('augment-normalizer-v4.json') as fopen:
    texts = json.load(fopen)
    
before, after = [], []
    
for splitted in texts:
    if len(splitted) < 2:
        continue
    before.append(list(splitted[0]))
    after.append(list(splitted[1]))
    
assert len(before) == len(after)

In [4]:
concat_from = list(itertools.chain(*before))
vocabulary_size_from = len(list(set(concat_from)))
data_from, count_from, dictionary_from, rev_dictionary_from = build_dataset(concat_from, vocabulary_size_from)
print('vocab from size: %d'%(vocabulary_size_from))
print('Most common words', count_from[4:10])
print('Sample data', data_from[:10], [rev_dictionary_from[i] for i in data_from[:10]])
print('filtered vocab size:',len(dictionary_from))
print("% of vocab used: {}%".format(round(len(dictionary_from)/vocabulary_size_from,4)*100))

vocab from size: 28
Most common words [('a', 1090958), ('l', 943383), ('e', 773153), ('n', 623036), ('r', 499905), ('x', 439435)]
Sample data [4, 19, 4, 20, 9, 19, 4, 20, 9, 19] ['a', 'b', 'a', 'd', 'x', 'b', 'a', 'd', 'x', 'b']
filtered vocab size: 32
% of vocab used: 114.29%


In [5]:
concat_to = list(itertools.chain(*after))
vocabulary_size_to = len(list(set(concat_to)))
data_to, count_to, dictionary_to, rev_dictionary_to = build_dataset(concat_to, vocabulary_size_to)
print('vocab from size: %d'%(vocabulary_size_to))
print('Most common words', count_to[4:10])
print('Sample data', data_to[:10], [rev_dictionary_to[i] for i in data_to[:10]])
print('filtered vocab size:',len(dictionary_to))
print("% of vocab used: {}%".format(round(len(dictionary_to)/vocabulary_size_to,4)*100))

vocab from size: 29
Most common words [('a', 2164890), (' ', 1131495), ('l', 943383), ('k', 843343), ('h', 828089), ('t', 729459)]
Sample data [4, 19, 4, 21, 9, 4, 7, 5, 4, 19] ['a', 'b', 'a', 'd', 't', 'a', 'k', ' ', 'a', 'b']
filtered vocab size: 33
% of vocab used: 113.78999999999999%


In [6]:
GO = dictionary_from['GO']
PAD = dictionary_from['PAD']
EOS = dictionary_from['EOS']
UNK = dictionary_from['UNK']

In [7]:
for i in range(len(after)):
    after[i].append('EOS')

In [8]:
class Stemmer:
    def __init__(self, size_layer, num_layers, embedded_size, 
                 from_dict_size, to_dict_size, learning_rate, 
                 dropout = 0.8, beam_width = 15):
        
        def lstm_cell(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size_layer, reuse=reuse)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]

        encoder_embeddings = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        encoder_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)])
        self.encoder_out, self.encoder_state = tf.nn.dynamic_rnn(cell = encoder_cells, 
                                                                 inputs = encoder_embedded, 
                                                                 sequence_length = self.X_seq_len,
                                                                 dtype = tf.float32)
        
        self.encoder_state = tuple(self.encoder_state[-1] for _ in range(num_layers))
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        decoder_embeddings = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
        dense_layer = tf.layers.Dense(to_dict_size)
        
        decoder_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)])

        with tf.variable_scope('decode'):
            training_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                    inputs = tf.nn.embedding_lookup(decoder_embeddings, decoder_input),
                    sequence_length = self.Y_seq_len,
                    embedding = decoder_embeddings,
                    sampling_probability = 0.5,
                    time_major = False)
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell = decoder_cells,
                    helper = training_helper,
                    initial_state = self.encoder_state,
                    output_layer = dense_layer)
            training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder = training_decoder,
                    impute_finished = True,
                    maximum_iterations = tf.reduce_max(self.Y_seq_len))
            
        # testing session
        with tf.variable_scope('decode', reuse=True):
            
            predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell = decoder_cells,
                    embedding = decoder_embeddings,
                    start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                    end_token = EOS,
                    initial_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, beam_width),
                    beam_width = beam_width,
                    output_layer = dense_layer,
                    length_penalty_weight = 0.0)
            predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder = predicting_decoder,
                    impute_finished = False,
                    maximum_iterations = 2 * tf.reduce_max(self.X_seq_len))
            
        self.training_logits = training_decoder_output.rnn_output
        self.predicting_ids = tf.identity(predicting_decoder_output.predicted_ids[:, :, 0],name="logits")
        
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [9]:
size_layer = 256
num_layers = 2
embedded_size = 128
learning_rate = 1e-3
batch_size = 128
epoch = 10

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Stemmer(size_layer, num_layers, embedded_size, len(dictionary_from), 
                len(dictionary_to), learning_rate)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
def str_idx(corpus, dic, UNK=3):
    X = []
    for i in corpus:
        ints = []
        for k in i:
            ints.append(dic.get(k, UNK))
        X.append(ints)
    return X

In [12]:
X = str_idx(before, dictionary_from)
Y = str_idx(after, dictionary_to)

In [13]:
from sklearn.cross_validation import train_test_split
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.1)



In [14]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [15]:
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break
    total_loss, total_accuracy, total_loss_test, total_accuracy_test = 0, 0, 0, 0
    train_X, train_Y = shuffle(train_X, train_Y)
    test_X, test_Y = shuffle(test_X, test_Y)
    pbar = tqdm(range(0, len(train_X), batch_size), desc='train minibatch loop')
    for k in pbar:
        batch_x, _ = pad_sentence_batch(train_X[k: min(k+batch_size,len(train_X))], PAD)
        batch_y, _ = pad_sentence_batch(train_Y[k: min(k+batch_size,len(train_X))], PAD)
        acc, loss, _ = sess.run([model.accuracy, model.cost, model.optimizer], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})
        total_loss += loss
        total_accuracy += acc
        pbar.set_postfix(cost=loss, accuracy = acc)
        
    pbar = tqdm(range(0, len(test_X), batch_size), desc='test minibatch loop')
    for k in pbar:
        batch_x, _ = pad_sentence_batch(test_X[k: min(k+batch_size,len(test_X))], PAD)
        batch_y, _ = pad_sentence_batch(test_Y[k: min(k+batch_size,len(test_X))], PAD)
        acc, loss = sess.run([model.accuracy, model.cost], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})
        total_loss_test += loss
        total_accuracy_test += acc
        pbar.set_postfix(cost=loss, accuracy = acc)
        
    total_loss /= (len(train_X) / batch_size)
    total_accuracy /= (len(train_X) / batch_size)
    total_loss_test /= (len(test_X) / batch_size)
    total_accuracy_test /= (len(test_X) / batch_size)
    
    if total_accuracy_test > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, total_accuracy_test)
        )
        CURRENT_ACC = total_accuracy_test
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
        
    print('epoch: %d, avg loss: %f, avg accuracy: %f'%(EPOCH, total_loss, total_accuracy))
    print('epoch: %d, avg loss test: %f, avg accuracy test: %f'%(EPOCH, total_loss_test, total_accuracy_test))
    EPOCH += 1

train minibatch loop: 100%|██████████| 5984/5984 [06:29<00:00, 15.38it/s, accuracy=0.967, cost=0.0828]
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 36.97it/s, accuracy=0.987, cost=0.0466]


epoch: 0, pass acc: 0.000000, current acc: 0.972859
epoch: 0, avg loss: 0.200181, avg accuracy: 0.937777
epoch: 0, avg loss test: 0.084564, avg accuracy test: 0.972859


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.40it/s, accuracy=0.974, cost=0.0467]
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.54it/s, accuracy=0.971, cost=0.0842]


epoch: 1, pass acc: 0.972859, current acc: 0.981631
epoch: 1, avg loss: 0.069789, avg accuracy: 0.977167
epoch: 1, avg loss test: 0.055877, avg accuracy test: 0.981631


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.95it/s, accuracy=0.99, cost=0.0297] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.44it/s, accuracy=0.973, cost=0.0824]


epoch: 2, pass acc: 0.981631, current acc: 0.985859
epoch: 2, avg loss: 0.047810, avg accuracy: 0.983950
epoch: 2, avg loss test: 0.042659, avg accuracy test: 0.985859


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.74it/s, accuracy=0.99, cost=0.0322] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.76it/s, accuracy=0.987, cost=0.034] 


epoch: 3, pass acc: 0.985859, current acc: 0.987996
epoch: 3, avg loss: 0.037358, avg accuracy: 0.987236
epoch: 3, avg loss test: 0.035575, avg accuracy test: 0.987996


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.07it/s, accuracy=0.995, cost=0.0213] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.02it/s, accuracy=0.988, cost=0.0352]


epoch: 4, pass acc: 0.987996, current acc: 0.988717
epoch: 4, avg loss: 0.032174, avg accuracy: 0.988816
epoch: 4, avg loss test: 0.032830, avg accuracy test: 0.988717


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.80it/s, accuracy=1, cost=0.00607]    
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.56it/s, accuracy=0.986, cost=0.0411] 


epoch: 5, pass acc: 0.988717, current acc: 0.989124
epoch: 5, avg loss: 0.029497, avg accuracy: 0.989597
epoch: 5, avg loss test: 0.031332, avg accuracy test: 0.989124


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.05it/s, accuracy=0.997, cost=0.0131] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.55it/s, accuracy=0.992, cost=0.0237] 


epoch: 6, pass acc: 0.989124, current acc: 0.989811
epoch: 6, avg loss: 0.027564, avg accuracy: 0.990068
epoch: 6, avg loss test: 0.029369, avg accuracy test: 0.989811


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.44it/s, accuracy=0.998, cost=0.0109] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.83it/s, accuracy=0.99, cost=0.0268] 


epoch: 7, avg loss: 0.026353, avg accuracy: 0.990354
epoch: 7, avg loss test: 0.029131, avg accuracy test: 0.989606


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.58it/s, accuracy=0.986, cost=0.0314] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.54it/s, accuracy=0.99, cost=0.0252]  


epoch: 8, pass acc: 0.989811, current acc: 0.989925
epoch: 8, avg loss: 0.025568, avg accuracy: 0.990531
epoch: 8, avg loss test: 0.028264, avg accuracy test: 0.989925


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.51it/s, accuracy=0.995, cost=0.0183] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 38.17it/s, accuracy=0.988, cost=0.0295] 


epoch: 9, pass acc: 0.989925, current acc: 0.989976
epoch: 9, avg loss: 0.024541, avg accuracy: 0.990864
epoch: 9, avg loss test: 0.027623, avg accuracy test: 0.989976


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.37it/s, accuracy=1, cost=0.00304]    
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 38.37it/s, accuracy=0.984, cost=0.0456] 


epoch: 10, avg loss: 0.024017, avg accuracy: 0.990956
epoch: 10, avg loss test: 0.027589, avg accuracy test: 0.989776


train minibatch loop: 100%|██████████| 5984/5984 [06:25<00:00, 16.36it/s, accuracy=0.995, cost=0.0111] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.75it/s, accuracy=0.987, cost=0.0333] 


epoch: 11, pass acc: 0.989976, current acc: 0.990265
epoch: 11, avg loss: 0.023684, avg accuracy: 0.991007
epoch: 11, avg loss test: 0.026711, avg accuracy test: 0.990265


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.26it/s, accuracy=0.974, cost=0.0722] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.48it/s, accuracy=0.994, cost=0.0188] 


epoch: 12, avg loss: 0.023295, avg accuracy: 0.991079
epoch: 12, avg loss test: 0.026265, avg accuracy test: 0.990262


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.98it/s, accuracy=0.985, cost=0.0383] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.66it/s, accuracy=0.986, cost=0.0325] 


epoch: 13, pass acc: 0.990265, current acc: 0.990421
epoch: 13, avg loss: 0.022886, avg accuracy: 0.991161
epoch: 13, avg loss test: 0.025755, avg accuracy test: 0.990421


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.84it/s, accuracy=0.997, cost=0.0098] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.64it/s, accuracy=0.989, cost=0.0222] 


epoch: 14, avg loss: 0.022539, avg accuracy: 0.991236
epoch: 14, avg loss test: 0.025810, avg accuracy test: 0.990272


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.72it/s, accuracy=0.984, cost=0.03]   
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.54it/s, accuracy=0.989, cost=0.0257] 


epoch: 15, avg loss: 0.022501, avg accuracy: 0.991206
epoch: 15, avg loss test: 0.026231, avg accuracy test: 0.990367


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.30it/s, accuracy=0.989, cost=0.0201] 
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.56it/s, accuracy=0.989, cost=0.0364] 


epoch: 16, pass acc: 0.990421, current acc: 0.990481
epoch: 16, avg loss: 0.022175, avg accuracy: 0.991284
epoch: 16, avg loss test: 0.025535, avg accuracy test: 0.990481


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.14it/s, accuracy=0.98, cost=0.0555]  
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.51it/s, accuracy=0.991, cost=0.0199] 


epoch: 17, avg loss: 0.022192, avg accuracy: 0.991290
epoch: 17, avg loss test: 0.026083, avg accuracy test: 0.990123


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.28it/s, accuracy=0.98, cost=0.038]   
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.63it/s, accuracy=0.989, cost=0.0242] 


epoch: 18, pass acc: 0.990481, current acc: 0.990512
epoch: 18, avg loss: 0.021922, avg accuracy: 0.991327
epoch: 18, avg loss test: 0.025003, avg accuracy test: 0.990512


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.45it/s, accuracy=1, cost=0.00151]    
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.60it/s, accuracy=0.992, cost=0.0171] 


epoch: 19, pass acc: 0.990512, current acc: 0.990604
epoch: 19, avg loss: 0.021909, avg accuracy: 0.991356
epoch: 19, avg loss test: 0.025100, avg accuracy test: 0.990604


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.62it/s, accuracy=0.995, cost=0.008]  
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.46it/s, accuracy=0.99, cost=0.0352]  


epoch: 20, pass acc: 0.990604, current acc: 0.990627
epoch: 20, avg loss: 0.021701, avg accuracy: 0.991362
epoch: 20, avg loss test: 0.024700, avg accuracy test: 0.990627


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.22it/s, accuracy=0.992, cost=0.014]  
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.55it/s, accuracy=0.99, cost=0.0204]  


epoch: 21, avg loss: 0.021849, avg accuracy: 0.991318
epoch: 21, avg loss test: 0.025859, avg accuracy test: 0.990223


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 16.45it/s, accuracy=0.997, cost=0.00812]
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.54it/s, accuracy=0.993, cost=0.0133] 


epoch: 22, avg loss: 0.021296, avg accuracy: 0.991493
epoch: 22, avg loss test: 0.024581, avg accuracy test: 0.990610


train minibatch loop: 100%|██████████| 5984/5984 [06:26<00:00, 15.64it/s, accuracy=0.99, cost=0.0182]  
test minibatch loop: 100%|██████████| 665/665 [00:17<00:00, 37.51it/s, accuracy=0.986, cost=0.034]  

epoch: 23, avg loss: 0.021460, avg accuracy: 0.991417
epoch: 23, avg loss test: 0.024776, avg accuracy test: 0.990571
break epoch:24






In [16]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, "beamsearch-lstm-normalize/model.ckpt")

'beamsearch-lstm-normalize/model.ckpt'

In [17]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
    ]
)

In [18]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            "directory: %s" % model_dir)

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + "/frozen_model.pb"
    clear_devices = True
    with tf.Session(graph=tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(",")
        ) 
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))

In [19]:
freeze_graph("beamsearch-lstm-normalize", strings)

INFO:tensorflow:Restoring parameters from beamsearch-lstm-normalize/model.ckpt
INFO:tensorflow:Froze 12 variables.
INFO:tensorflow:Converted 12 variables to const ops.
1128 ops in the final graph.


In [20]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [21]:
g=load_graph('beamsearch-lstm-normalize/frozen_model.pb')

In [22]:
x = g.get_tensor_by_name('import/Placeholder:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph=g)
predicted = test_sess.run(logits,feed_dict={x:str_idx(['kecomelan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: kecomelan




In [26]:
x = g.get_tensor_by_name('import/Placeholder:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph=g)
predicted = test_sess.run(logits,feed_dict={x:str_idx(['xjdi'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: tak je


In [24]:
predicted = test_sess.run(logits,feed_dict={x:str_idx(['xperjlnan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: tak perjalanan


In [25]:
import json
with open('beamsearch-lstm-normalize.json','w') as fopen:
    fopen.write(json.dumps({'dictionary_from':dictionary_from,
                'dictionary_to':dictionary_to,
                'rev_dictionary_to':rev_dictionary_to,
                'rev_dictionary_from':rev_dictionary_from}))