In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [2]:
# !wget https://huggingface.co/huseinzol05/bpe/resolve/main/stemmer.yttm

In [13]:
import youtokentome as yttm
import tensorflow as tf



2022-08-31 22:55:11.033086: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [4]:
bpe = yttm.BPE(model='stemmer-v2.yttm')

In [5]:
bpe.vocab_size()

1000

In [6]:
PAD = 0
EOS = 1
UNK = 2
GO = 3

In [7]:
class Translator:
    def __init__(self, size_layer, num_layers, embedded_size,
                 from_dict_size, to_dict_size, learning_rate, beam_width = 10):
        
        def cells(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size_layer,initializer=tf.orthogonal_initializer(),reuse=reuse)
        
        def attention(encoder_out, seq_len, reuse=False):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units = size_layer, 
                                                                    memory = encoder_out,
                                                                    memory_sequence_length = seq_len)
            return tf.contrib.seq2seq.AttentionWrapper(
            cell = tf.nn.rnn_cell.MultiRNNCell([cells(reuse) for _ in range(num_layers)]), 
                attention_mechanism = attention_mechanism,
                attention_layer_size = size_layer)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]
        
        encoder_embedding = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
        decoder_embedding = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
        
        encoder_out, encoder_state = tf.nn.dynamic_rnn(
            cell = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)]), 
            inputs = tf.nn.embedding_lookup(encoder_embedding, self.X),
            sequence_length = self.X_seq_len,
            dtype = tf.float32)
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        dense = tf.layers.Dense(to_dict_size)
        
        with tf.variable_scope('decode'):
            decoder_cells = attention(encoder_out, self.X_seq_len)
            training_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs = tf.nn.embedding_lookup(decoder_embedding, decoder_input),
                sequence_length = self.Y_seq_len,
                time_major = False)
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell = decoder_cells,
                    helper = training_helper,
                    initial_state = decoder_cells.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state),
                    output_layer = dense)
            training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder = training_decoder,
                    impute_finished = True,
                    maximum_iterations = tf.reduce_max(self.Y_seq_len))
            self.training_logits = training_decoder_output.rnn_output
            
        with tf.variable_scope('decode', reuse=True):
            predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding = decoder_embedding,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS)
            predicting_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell = decoder_cells,
                    helper = predicting_helper,
                    initial_state = decoder_cells.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state),
                    output_layer = dense)
            predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder = predicting_decoder,
                    impute_finished = True,
                    maximum_iterations = 2 * tf.reduce_max(self.X_seq_len))
            self.greedy = predicting_decoder_output.sample_id
            self.greedy = tf.identity(self.greedy,name='greedy')
        
        with tf.variable_scope('decode', reuse=True):
            
            encoder_out_tiled = tf.contrib.seq2seq.tile_batch(encoder_out, beam_width)
            encoder_state_tiled = tf.contrib.seq2seq.tile_batch(encoder_state, beam_width)
            X_seq_len_tiled = tf.contrib.seq2seq.tile_batch(self.X_seq_len, beam_width)
            decoder_cell = attention(encoder_out_tiled, X_seq_len_tiled, reuse=True)
            
            predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                cell = decoder_cell,
                embedding = decoder_embedding,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS,
                initial_state = decoder_cell.zero_state(batch_size * beam_width, tf.float32).clone(
                    cell_state = encoder_state_tiled),
                beam_width = beam_width,
                output_layer = dense,
                length_penalty_weight = 0.0)
            
            predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = predicting_decoder,
                impute_finished = False,
                maximum_iterations = tf.reduce_max(self.X_seq_len))
            
            self.beam = predicting_decoder_output.predicted_ids[:, :, 0]
            self.beam = tf.identity(self.beam,name='beam')
        
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [8]:
size_layer = 256
num_layers = 2
embedded_size = 256
learning_rate = 1e-3
batch_size = 64
epoch = 20

In [11]:
vocab_size = bpe.vocab_size()

In [14]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Translator(size_layer, num_layers, embedded_size, vocab_size, vocab_size, learning_rate)
sess.run(tf.global_variables_initializer())





2022-08-31 22:55:12.800900: I tensorflow/core/platform/profile_utils/cpu_utils.cc:109] CPU Frequency: 2496000000 Hz
2022-08-31 22:55:12.801751: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2628f30 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-08-31 22:55:12.801764: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2022-08-31 22:55:12.804282: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-08-31 22:55:12.872217: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-31 22:55:12.873031: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x221f770 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2022-08-31 22:55:





The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



2022-08-31 22:55:13.732519: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1206] Device interconnect StreamExecutor with strength 1 edge matrix:
2022-08-31 22:55:13.732547: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1212]      0 
2022-08-31 22:55:13.732554: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1225] 0:   N 
2022-08-31 22:55:13.733101: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-31 22:55:13.734202: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-31 22:55:13.735088: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1351] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 22056 MB memory) -> physical GPU (device: 0, name: NVIDIA GeF





In [15]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'lstm/model.ckpt')





'lstm/model.ckpt'

In [16]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [18]:
import json

with open('train_stem.json') as fopen:
    data = json.load(fopen)

In [19]:
train_X, train_Y = data['X'], data['Y']

In [20]:
with open('test_stem.json') as fopen:
    data = json.load(fopen)

In [21]:
test_X, test_Y = data['X'], data['Y']

In [22]:
len(train_X), len(test_X)

(1105254, 122806)

In [25]:
def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [35]:
def encode(strings):
    encoded = bpe.encode(strings, output_type=yttm.OutputType.ID)
    for i in range(len(encoded)):
        encoded[i].append(1)
        
    
    return pad_sentence_batch(encoded, 0)[0]

In [36]:
batch_x = encode(train_X[: 5])
batch_y = encode(train_Y[: 5])

In [37]:
sess.run([model.cost, model.optimizer], feed_dict = {model.X: batch_x, model.Y: batch_y})

[6.7832146, None]

In [38]:
greedy, beam = sess.run([model.greedy, model.beam], feed_dict = {model.X: encode(test_X[:1])})

In [39]:
PAD

0

In [42]:
import tqdm
import numpy as np

for e in range(epoch):
    pbar = tqdm.tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = encode(train_X[i: index])
        batch_y = encode(train_Y[i: index])
        feed = {model.X: batch_x,
                model.Y: batch_y}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
        
    pbar = tqdm.tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = encode(test_X[i: index])
        batch_y = encode(test_Y[i: index])
        feed = {model.X: batch_x,
                model.Y: batch_y,}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:38<00:00, 50.97it/s, accuracy=0.984, cost=0.0502]
minibatch loop: 100%|██████████████████████████████████████████| 1919/1919 [00:16<00:00, 116.27it/s, accuracy=0.969, cost=0.088]


epoch 1, training avg loss 0.190069, training avg acc 0.958462
epoch 1, testing avg loss 0.068573, testing avg acc 0.980784


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:41<00:00, 50.50it/s, accuracy=0.979, cost=0.0482]
minibatch loop: 100%|██████████████████████████████████████████| 1919/1919 [00:16<00:00, 117.31it/s, accuracy=0.962, cost=0.121]


epoch 2, training avg loss 0.055345, training avg acc 0.983778
epoch 2, testing avg loss 0.051792, testing avg acc 0.984755


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:40<00:00, 50.75it/s, accuracy=0.984, cost=0.0534]
minibatch loop: 100%|██████████████████████████████████████████| 1919/1919 [00:16<00:00, 117.24it/s, accuracy=0.985, cost=0.042]


epoch 3, training avg loss 0.043766, training avg acc 0.986767
epoch 3, testing avg loss 0.044021, testing avg acc 0.986961


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:41<00:00, 50.52it/s, accuracy=0.995, cost=0.0214]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 114.30it/s, accuracy=0.981, cost=0.0353]


epoch 4, training avg loss 0.038122, training avg acc 0.988276
epoch 4, testing avg loss 0.045103, testing avg acc 0.986909


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:44<00:00, 50.09it/s, accuracy=0.989, cost=0.0248]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 116.58it/s, accuracy=0.981, cost=0.0573]


epoch 5, training avg loss 0.034501, training avg acc 0.989312
epoch 5, testing avg loss 0.043325, testing avg acc 0.987728


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:41<00:00, 50.62it/s, accuracy=0.995, cost=0.0119]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 117.22it/s, accuracy=0.977, cost=0.0727]


epoch 6, training avg loss 0.032236, training avg acc 0.989974
epoch 6, testing avg loss 0.042486, testing avg acc 0.988128


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:45<00:00, 50.00it/s, accuracy=0.989, cost=0.0364]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 114.03it/s, accuracy=0.977, cost=0.0996]


epoch 7, training avg loss 0.030661, training avg acc 0.990452
epoch 7, testing avg loss 0.041820, testing avg acc 0.988554


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:48<00:00, 49.60it/s, accuracy=0.995, cost=0.0187]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 113.20it/s, accuracy=0.989, cost=0.0344]


epoch 8, training avg loss 0.029076, training avg acc 0.990942
epoch 8, testing avg loss 0.041454, testing avg acc 0.988507


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:44<00:00, 50.11it/s, accuracy=0.995, cost=0.0111]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 117.04it/s, accuracy=0.977, cost=0.0561]


epoch 9, training avg loss 0.028049, training avg acc 0.991237
epoch 9, testing avg loss 0.042071, testing avg acc 0.988930


minibatch loop:  67%|████████████████████████████▎             | 11630/17270 [03:49<01:48, 51.82it/s, accuracy=0.99, cost=0.036]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:41<00:00, 50.60it/s, accuracy=0.995, cost=0.0134]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 116.87it/s, accuracy=0.992, cost=0.0162]


epoch 12, training avg loss 0.025533, training avg acc 0.992099
epoch 12, testing avg loss 0.041639, testing avg acc 0.989444


minibatch loop: 100%|███████████████████████████████████████████| 17270/17270 [05:46<00:00, 49.84it/s, accuracy=1, cost=0.00323]
minibatch loop: 100%|██████████████████████████████████████████| 1919/1919 [00:16<00:00, 114.86it/s, accuracy=0.977, cost=0.066]


epoch 13, training avg loss 0.025277, training avg acc 0.992268
epoch 13, testing avg loss 0.041396, testing avg acc 0.989518


minibatch loop: 100%|████████████████████████████████████████| 17270/17270 [05:42<00:00, 50.46it/s, accuracy=0.995, cost=0.0214]
minibatch loop: 100%|█████████████████████████████████████████| 1919/1919 [00:16<00:00, 115.95it/s, accuracy=0.992, cost=0.0387]


epoch 14, training avg loss 0.024778, training avg acc 0.992423
epoch 14, testing avg loss 0.042864, testing avg acc 0.989474


minibatch loop:  23%|█████████▍                                | 3889/17270 [01:17<04:25, 50.42it/s, accuracy=0.99, cost=0.0274]


KeyboardInterrupt: 

In [43]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'lstm/model.ckpt')

'lstm/model.ckpt'

In [44]:
berjalankan = bpe.encode('berjalankansangat', output_type=yttm.OutputType.ID)

In [45]:
greedy, beam = sess.run([model.greedy, model.beam], feed_dict = {model.X: [berjalankan]})

In [47]:
bpe.decode(greedy.tolist())

['jalan<EOS>']

In [48]:
def calculate_cer(actual, hyp):
    """
    Calculate CER using `python-Levenshtein`.
    """
    import Levenshtein as Lev

    actual = actual.replace(' ', '')
    hyp = hyp.replace(' ', '')
    return Lev.distance(actual, hyp) / len(actual)


def calculate_wer(actual, hyp):
    """
    Calculate WER using `python-Levenshtein`.
    """
    import Levenshtein as Lev

    b = set(actual.split() + hyp.split())
    word2char = dict(zip(b, range(len(b))))

    w1 = [chr(word2char[w]) for w in actual.split()]
    w2 = [chr(word2char[w]) for w in hyp.split()]

    return Lev.distance(''.join(w1), ''.join(w2)) / len(actual.split())


In [61]:
cers, wers = [], []

batch_size = 1
pbar = tqdm.tqdm(
    range(0, len(test_X), batch_size), desc = 'minibatch loop')
for i in pbar:
    index = min(i + batch_size, len(test_X))
    batch_x = encode(test_X[i: index])
    batch_y = encode(test_Y[i: index])
    feed = {model.X: batch_x,
            model.Y: batch_y,}
    greedy = sess.run(model.greedy, feed_dict = feed)
    predicted = bpe.decode(greedy.tolist())[0].replace('<EOS>', '')
    actual = bpe.decode(batch_y)[0].replace('<EOS>', '')
    cer = calculate_cer(actual, predicted)
    wer = calculate_wer(actual, predicted)
    
    cers.append(cer)
    wers.append(wer)
    
np.mean(cers), np.mean(wers)

minibatch loop: 100%|██████████████████████████████████████████████████████████████████| 122806/122806 [10:26<00:00, 195.97it/s]


(0.021437940248792045, 0.04399622168297966)

In [62]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'greedy' in n.name
        or 'beam' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
    ]
)
strings.split(',')




['Placeholder',
 'Placeholder_1',
 'Variable',
 'Variable_1',
 'rnn/multi_rnn_cell/cell_0/lstm_cell/kernel',
 'rnn/multi_rnn_cell/cell_0/lstm_cell/bias',
 'rnn/multi_rnn_cell/cell_1/lstm_cell/kernel',
 'rnn/multi_rnn_cell/cell_1/lstm_cell/bias',
 'decode/memory_layer/kernel',
 'decode/decoder/attention_wrapper/multi_rnn_cell/cell_0/lstm_cell/kernel',
 'decode/decoder/attention_wrapper/multi_rnn_cell/cell_0/lstm_cell/bias',
 'decode/decoder/attention_wrapper/multi_rnn_cell/cell_1/lstm_cell/kernel',
 'decode/decoder/attention_wrapper/multi_rnn_cell/cell_1/lstm_cell/bias',
 'decode/decoder/attention_wrapper/bahdanau_attention/query_layer/kernel',
 'decode/decoder/attention_wrapper/bahdanau_attention/attention_v',
 'decode/decoder/attention_wrapper/attention_layer/kernel',
 'decode/decoder/dense/kernel',
 'decode/decoder/dense/bias',
 'decode_1/greedy',
 'decode_2/decoder/while/BeamSearchDecoderStep/beam_width',
 'decode_2/decoder/while/BeamSearchDecoderStep/next_beam_probs/range/start',
 

In [63]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            "directory: %s" % model_dir)

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + "/frozen_model.pb"
    clear_devices = True
    with tf.Session(graph=tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(",")
        ) 
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))

In [64]:
freeze_graph("lstm", strings)






2022-09-01 00:42:31.022294: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-01 00:42:31.036168: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1666] Found device 0 with properties: 
name: NVIDIA GeForce RTX 3090 Ti major: 8 minor: 6 memoryClockRate(GHz): 1.86
pciBusID: 0000:01:00.0
2022-09-01 00:42:31.036219: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2022-09-01 00:42:31.036237: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2022-09-01 00:42:31.036244: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10
2022-09-01 00:42:31.036250: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library

INFO:tensorflow:Restoring parameters from lstm/model.ckpt
INFO:tensorflow:Froze 16 variables.
INFO:tensorflow:Converted 16 variables to const ops.

1649 ops in the final graph.


In [65]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [66]:
g = load_graph('lstm/frozen_model.pb')




In [67]:
x = g.get_tensor_by_name('import/Placeholder:0')
i_greedy = g.get_tensor_by_name('import/decode_1/greedy:0')
i_beam = g.get_tensor_by_name('import/decode_2/beam:0')

In [68]:
test_sess = tf.InteractiveSession(graph=g)

2022-09-01 00:42:34.504743: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-01 00:42:34.508401: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1666] Found device 0 with properties: 
name: NVIDIA GeForce RTX 3090 Ti major: 8 minor: 6 memoryClockRate(GHz): 1.86
pciBusID: 0000:01:00.0
2022-09-01 00:42:34.508501: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2022-09-01 00:42:34.508564: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2022-09-01 00:42:34.508659: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10
2022-09-01 00:42:34.508683: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library

In [69]:
greedy, beam = test_sess.run([i_greedy, i_beam], feed_dict = {x: [berjalankan]})

In [70]:
bpe.decode(greedy.tolist()[0])

['jalan<EOS>']

In [71]:
from tensorflow.tools.graph_transforms import TransformGraph
from tensorflow.contrib.seq2seq.python.ops import beam_search_ops

In [72]:
transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-10, fallback_max=10)',
             'strip_unused_nodes',
             'sort_by_execution_order']

pb = 'lstm/frozen_model.pb'
input_graph_def = tf.GraphDef()
with tf.gfile.FastGFile(pb, 'rb') as f:
    input_graph_def.ParseFromString(f.read())

transformed_graph_def = TransformGraph(input_graph_def, 
                                       ['Placeholder'],
                                       ['decode_1/greedy', 'decode_2/beam'], transforms)

with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())

2022-09-01 00:42:38.668770: I tensorflow/tools/graph_transforms/transform_graph.cc:318] Applying add_default_attributes
2022-09-01 00:42:38.675316: I tensorflow/tools/graph_transforms/transform_graph.cc:318] Applying remove_nodes
2022-09-01 00:42:38.681046: I tensorflow/tools/graph_transforms/remove_nodes.cc:78] Skipping replacement for decode_1/greedy
2022-09-01 00:42:38.681910: I tensorflow/tools/graph_transforms/remove_nodes.cc:78] Skipping replacement for decode_2/beam
2022-09-01 00:42:38.688958: I tensorflow/tools/graph_transforms/remove_nodes.cc:78] Skipping replacement for decode_1/greedy
2022-09-01 00:42:38.689583: I tensorflow/tools/graph_transforms/remove_nodes.cc:78] Skipping replacement for decode_2/beam
2022-09-01 00:42:38.696485: I tensorflow/tools/graph_transforms/remove_nodes.cc:78] Skipping replacement for decode_1/greedy
2022-09-01 00:42:38.697167: I tensorflow/tools/graph_transforms/remove_nodes.cc:78] Skipping replacement for decode_2/beam
2022-09-01 00:42:38.718220

In [73]:
from malaya_boilerplate.huggingface import upload_dict

  from .autonotebook import tqdm as notebook_tqdm


In [74]:
!tar -cvf output-stemmer-base.tar lstm

lstm/
lstm/checkpoint
lstm/frozen_model.pb.quantized
lstm/model.ckpt.index
lstm/model.ckpt.data-00000-of-00001
lstm/model.ckpt.meta
lstm/frozen_model.pb


In [75]:
files_mapping = {'output-stemmer-base.tar': 'output-stemmer-base.tar'}
upload_dict(model = 'pretrained-stemmer', files_mapping = files_mapping)



In [76]:
files_mapping = {'lstm/frozen_model.pb': 'model.pb'}
upload_dict(model = 'stem-v2-lstm-bahdanau', files_mapping = files_mapping)

In [77]:
files_mapping = {'lstm/frozen_model.pb.quantized': 'model.pb'}
upload_dict(model = 'stem-v2-lstm-bahdanau-quantized', files_mapping = files_mapping)