In [1]:
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
import re
import time
import collections
import os
import itertools
from tqdm import tqdm

In [2]:
def build_dataset(words, n_words, atleast=1):
    count = [['GO', 0], ['PAD', 1], ['EOS', 2], ['UNK', 3]]
    counter = collections.Counter(words).most_common(n_words)
    counter = [i for i in counter if i[1] >= atleast]
    count.extend(counter)
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 0)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary

In [3]:
with open('stemmer-data.txt','r') as fopen:
    texts = fopen.read().split('\n')
    
before, after = [], []
    
for i in texts:
    splitted = i.split('\t')
    if len(splitted) < 2:
        continue
    before.append(list(splitted[0]))
    after.append(list(splitted[1]))
    
assert len(before) == len(after)

In [4]:
concat_from = list(itertools.chain(*before))
vocabulary_size_from = len(list(set(concat_from)))
data_from, count_from, dictionary_from, rev_dictionary_from = build_dataset(concat_from, vocabulary_size_from)
print('vocab from size: %d'%(vocabulary_size_from))
print('Most common words', count_from[4:10])
print('Sample data', data_from[:10], [rev_dictionary_from[i] for i in data_from[:10]])
print('filtered vocab size:',len(dictionary_from))
print("% of vocab used: {}%".format(round(len(dictionary_from)/vocabulary_size_from,4)*100))

vocab from size: 28
Most common words [('a', 53190), ('n', 32568), ('e', 29367), ('i', 26679), ('r', 19577), ('s', 16557)]
Sample data [9, 17, 7, 18, 6, 8, 10, 8, 4, 5] ['s', 'p', 'i', 'd', 'e', 'r', 't', 'r', 'a', 'n']
filtered vocab size: 32
% of vocab used: 114.29%


In [5]:
concat_to = list(itertools.chain(*after))
vocabulary_size_to = len(list(set(concat_to)))
data_to, count_to, dictionary_to, rev_dictionary_to = build_dataset(concat_to, vocabulary_size_to)
print('vocab from size: %d'%(vocabulary_size_to))
print('Most common words', count_to[4:10])
print('Sample data', data_to[:10], [rev_dictionary_to[i] for i in data_to[:10]])
print('filtered vocab size:',len(dictionary_to))
print("% of vocab used: {}%".format(round(len(dictionary_to)/vocabulary_size_to,4)*100))

vocab from size: 28
Most common words [('a', 43498), ('i', 23379), ('n', 21388), ('e', 20612), ('r', 16760), ('s', 16088)]
Sample data [9, 17, 5, 18, 7, 8, 10, 8, 4, 6] ['s', 'p', 'i', 'd', 'e', 'r', 't', 'r', 'a', 'n']
filtered vocab size: 32
% of vocab used: 114.29%


In [6]:
GO = dictionary_from['GO']
PAD = dictionary_from['PAD']
EOS = dictionary_from['EOS']
UNK = dictionary_from['UNK']

In [7]:
for i in range(len(after)):
    after[i].append('EOS')

In [8]:
class Stemmer:
    def __init__(self, size_layer, num_layers, embedded_size, 
                 from_dict_size, to_dict_size, learning_rate, 
                 dropout = 0.8, beam_width = 15):
        
        def lstm_cell(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size_layer, reuse=reuse)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]

        encoder_embeddings = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        encoder_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)])
        self.encoder_out, self.encoder_state = tf.nn.dynamic_rnn(cell = encoder_cells, 
                                                                 inputs = encoder_embedded, 
                                                                 sequence_length = self.X_seq_len,
                                                                 dtype = tf.float32)
        
        self.encoder_state = tuple(self.encoder_state[-1] for _ in range(num_layers))
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        decoder_embeddings = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
        dense_layer = tf.layers.Dense(to_dict_size)
        
        decoder_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)])
        
        # training session
        with tf.variable_scope('decode'):
            training_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                    inputs = tf.nn.embedding_lookup(decoder_embeddings, decoder_input),
                    sequence_length = self.Y_seq_len,
                    embedding = decoder_embeddings,
                    sampling_probability = 0.5,
                    time_major = False)
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell = decoder_cells,
                    helper = training_helper,
                    initial_state = self.encoder_state,
                    output_layer = dense_layer)
            training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder = training_decoder,
                    impute_finished = True,
                    maximum_iterations = tf.reduce_max(self.Y_seq_len))
            
        # testing session
        with tf.variable_scope('decode', reuse=True):
            
            predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell = decoder_cells,
                    embedding = decoder_embeddings,
                    start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                    end_token = EOS,
                    initial_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, beam_width),
                    beam_width = beam_width,
                    output_layer = dense_layer,
                    length_penalty_weight = 0.0)
            predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder = predicting_decoder,
                    impute_finished = False,
                    maximum_iterations = 2 * tf.reduce_max(self.X_seq_len))
            
        self.training_logits = training_decoder_output.rnn_output
        self.predicting_ids = tf.identity(predicting_decoder_output.predicted_ids[:, :, 0],name="logits")
        
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)

In [9]:
size_layer = 256
num_layers = 2
embedded_size = 128
learning_rate = 1e-3
batch_size = 128
epoch = 50

In [10]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Stemmer(size_layer, num_layers, embedded_size, len(dictionary_from), 
                len(dictionary_to), learning_rate)
sess.run(tf.global_variables_initializer())

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [11]:
def str_idx(corpus, dic, UNK=3):
    X = []
    for i in corpus:
        ints = []
        for k in i:
            try:
                ints.append(dic[k])
            except Exception as e:
                ints.append(UNK)
        X.append(ints)
    return X

In [12]:
X = str_idx(before, dictionary_from)
Y = str_idx(after, dictionary_to)

In [13]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

def check_accuracy(logits, Y):
    acc = 0
    for i in range(logits.shape[0]):
        internal_acc = 0
        count = 0
        for k in range(len(Y[i])):
            try:
                if Y[i][k] == logits[i][k]:
                    internal_acc += 1
                count += 1
                if Y[i][k] == EOS:
                    break
            except:
                break
        acc += (internal_acc / count)
    return acc / logits.shape[0]

In [14]:
for i in range(epoch):
    total_loss, total_accuracy = 0, 0
    X, Y = shuffle(X, Y)
    pbar = tqdm(range(0, len(before), batch_size), desc='train minibatch loop')
    for k in pbar:
        batch_x, _ = pad_sentence_batch(X[k: min(k+batch_size,len(before))], PAD)
        batch_y, _ = pad_sentence_batch(Y[k: min(k+batch_size,len(before))], PAD)
        predicted, loss, _ = sess.run([model.predicting_ids, model.cost, model.optimizer], 
                                      feed_dict={model.X:batch_x,
                                                model.Y:batch_y})
        acc = check_accuracy(predicted,batch_y)
        total_loss += loss
        total_accuracy += acc
        pbar.set_postfix(cost=loss, accuracy = acc)
        
    total_loss /= (len(before) / batch_size)
    total_accuracy /= (len(before) / batch_size)
    print('epoch: %d, avg loss: %f, avg accuracy: %f'%(i+1, total_loss, total_accuracy))

train minibatch loop: 100%|██████████| 325/325 [01:19<00:00,  4.08it/s, accuracy=0.436, cost=0.811]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 1, avg loss: 1.079808, avg accuracy: 0.296571


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.47it/s, accuracy=0.689, cost=0.509]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 2, avg loss: 0.515500, avg accuracy: 0.625145


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.48it/s, accuracy=0.894, cost=0.291]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 3, avg loss: 0.299930, avg accuracy: 0.811725


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.48it/s, accuracy=0.9, cost=0.261]   
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 4, avg loss: 0.210670, avg accuracy: 0.873118


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.896, cost=0.19]  
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 5, avg loss: 0.162662, avg accuracy: 0.901118


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.48it/s, accuracy=0.933, cost=0.135] 
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 6, avg loss: 0.135120, avg accuracy: 0.916329


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.926, cost=0.106] 
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 7, avg loss: 0.114529, avg accuracy: 0.927345


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.49it/s, accuracy=0.928, cost=0.1]   
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 8, avg loss: 0.097845, avg accuracy: 0.937203


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.47it/s, accuracy=0.977, cost=0.0764]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 9, avg loss: 0.085720, avg accuracy: 0.943385


train minibatch loop: 100%|██████████| 325/325 [01:34<00:00,  3.45it/s, accuracy=0.948, cost=0.17]  
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 10, avg loss: 0.075839, avg accuracy: 0.950999


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.976, cost=0.0528]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 11, avg loss: 0.067185, avg accuracy: 0.956709


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.969, cost=0.119] 
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 13, avg loss: 0.050864, avg accuracy: 0.967988


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.47it/s, accuracy=0.971, cost=0.0732]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 14, avg loss: 0.046846, avg accuracy: 0.971205


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.48it/s, accuracy=0.948, cost=0.0489]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 15, avg loss: 0.041069, avg accuracy: 0.974725


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.984, cost=0.0475]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 16, avg loss: 0.035133, avg accuracy: 0.979962


train minibatch loop:  90%|████████▉ | 291/325 [01:23<00:09,  3.47it/s, accuracy=0.962, cost=0.0395]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.49it/s, accuracy=0.999, cost=0.0177] 
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 29, avg loss: 0.013356, avg accuracy: 0.995411


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.47it/s, accuracy=0.993, cost=0.00535]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 30, avg loss: 0.011959, avg accuracy: 0.995991


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.51it/s, accuracy=0.984, cost=0.012]  
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 31, avg loss: 0.012971, avg accuracy: 0.995483


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.47it/s, accuracy=1, cost=0.0142]     
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 32, avg loss: 0.012276, avg accuracy: 0.995856


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.979, cost=0.0342] 
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 33, avg loss: 0.011476, avg accuracy: 0.996715


train minibatch loop:   9%|▉         | 30/325 [00:08<01:25,  3.47it/s, accuracy=0.988, cost=0.0187] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.998, cost=0.00335]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 45, avg loss: 0.006028, avg accuracy: 0.999030


train minibatch loop: 100%|██████████| 325/325 [01:32<00:00,  3.50it/s, accuracy=0.985, cost=0.00642]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 46, avg loss: 0.010028, avg accuracy: 0.996484


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.49it/s, accuracy=0.996, cost=0.00477]
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 47, avg loss: 0.009676, avg accuracy: 0.996710


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.48it/s, accuracy=1, cost=0.000955]   
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 48, avg loss: 0.008256, avg accuracy: 0.997823


train minibatch loop: 100%|██████████| 325/325 [01:33<00:00,  3.48it/s, accuracy=1, cost=0.0029]     
train minibatch loop:   0%|          | 0/325 [00:00<?, ?it/s]

epoch: 49, avg loss: 0.005772, avg accuracy: 0.999575


train minibatch loop:  32%|███▏      | 104/325 [00:28<01:01,  3.60it/s, accuracy=1, cost=0.00153]    IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [15]:
predicted = sess.run(model.predicting_ids,feed_dict={model.X:batch_x})

In [16]:
for i in range(len(batch_x)):
    print('row %d'%(i+1))
    print('BEFORE:',''.join([rev_dictionary_from[n] for n in batch_x[i] if n not in [0,1,2,3]]))
    print('REAL AFTER:',''.join([rev_dictionary_to[n] for n in batch_y[i] if n not in[0,1,2,3]]))
    print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted[i] if n not in[0,1,2,3]]),'\n')

row 1
BEFORE: iot
REAL AFTER: iot
PREDICTED AFTER: iot 

row 2
BEFORE: dipersalahkan
REAL AFTER: salah
PREDICTED AFTER: salah 

row 3
BEFORE: pucuk
REAL AFTER: pucuk
PREDICTED AFTER: pucuk 

row 4
BEFORE: putih
REAL AFTER: putih
PREDICTED AFTER: putih 

row 5
BEFORE: competitions
REAL AFTER: competitions
PREDICTED AFTER: competitions 

row 6
BEFORE: regarding
REAL AFTER: regarding
PREDICTED AFTER: regarding 

row 7
BEFORE: usd
REAL AFTER: usd
PREDICTED AFTER: usd 

row 8
BEFORE: teratai
REAL AFTER: teratai
PREDICTED AFTER: teratai 

row 9
BEFORE: khuatir
REAL AFTER: khuatir
PREDICTED AFTER: khuatir 

row 10
BEFORE: zakariaperingatan
REAL AFTER: zakariaperingatan
PREDICTED AFTER: zakariaperingatan 

row 11
BEFORE: tidur
REAL AFTER: tidur
PREDICTED AFTER: tidur 

row 12
BEFORE: suka-suka
REAL AFTER: suka
PREDICTED AFTER: suka 

row 13
BEFORE: manuasia
REAL AFTER: manuasia
PREDICTED AFTER: manuasia 

row 14
BEFORE: peringkatnya
REAL AFTER: peringkat
PREDICTED AFTER: peringkat 

row 15
BEF

In [17]:
predicted = sess.run(model.predicting_ids,feed_dict={model.X:str_idx(['berjalan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: jalan


In [18]:
predicted = sess.run(model.predicting_ids,feed_dict={model.X:str_idx(['menikmati'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: nikmat


In [19]:
predicted = sess.run(model.predicting_ids,feed_dict={model.X:str_idx(['keladi'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: keladi


In [20]:
predicted = sess.run(model.predicting_ids,feed_dict={model.X:str_idx(['menghirupkan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: hirup


In [21]:
predicted = sess.run(model.predicting_ids,feed_dict={model.X:str_idx(['udara'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: udara


In [32]:
saver = tf.train.Saver(tf.global_variables())
saver.save(sess, "stemmer/model.ckpt")

'stemmer/model.ckpt'

In [37]:
strings=','.join([n.name for n in tf.get_default_graph().as_graph_def().node if "Variable" in n.op or n.name.find('Placeholder') >= 0 or n.name.find('logits') == 0])

In [39]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            "directory: %s" % model_dir)

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + "/frozen_model.pb"
    clear_devices = True
    with tf.Session(graph=tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(",")
        ) 
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))

In [40]:
freeze_graph("stemmer", strings)

INFO:tensorflow:Restoring parameters from stemmer/model.ckpt
INFO:tensorflow:Froze 38 variables.
Converted 38 variables to const ops.
911 ops in the final graph.


In [41]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [42]:
g=load_graph('stemmer/frozen_model.pb')

In [44]:
x = g.get_tensor_by_name('import/Placeholder:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph=g)
predicted = test_sess.run(logits,feed_dict={x:str_idx(['kecomelan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: comel


In [47]:
predicted = test_sess.run(logits,feed_dict={x:str_idx(['perjalanan'],dictionary_from)})[0]
print('PREDICTED AFTER:',''.join([rev_dictionary_to[n] for n in predicted if n not in[0,1,2,3]]))

PREDICTED AFTER: jalan


In [45]:
import json
with open('stemmer-deep.json','w') as fopen:
    fopen.write(json.dumps({'dictionary_from':dictionary_from,
                'dictionary_to':dictionary_to,
                'rev_dictionary_to':rev_dictionary_to,
                'rev_dictionary_from':rev_dictionary_from}))