In [1]:
!wget https://huseinhouse-storage.s3-ap-southeast-1.amazonaws.com/bert-bahasa/dictionary-pos.json
!wget https://huseinhouse-storage.s3-ap-southeast-1.amazonaws.com/bert-bahasa/session-pos.pkl

--2019-08-04 13:55:49--  https://huseinhouse-storage.s3-ap-southeast-1.amazonaws.com/bert-bahasa/dictionary-pos.json
Resolving huseinhouse-storage.s3-ap-southeast-1.amazonaws.com (huseinhouse-storage.s3-ap-southeast-1.amazonaws.com)... 52.219.32.175
Connecting to huseinhouse-storage.s3-ap-southeast-1.amazonaws.com (huseinhouse-storage.s3-ap-southeast-1.amazonaws.com)|52.219.32.175|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 825070 (806K) [binary/octet-stream]
Saving to: ‘dictionary-pos.json’


2019-08-04 13:55:52 (554 KB/s) - ‘dictionary-pos.json’ saved [825070/825070]

--2019-08-04 13:55:53--  https://huseinhouse-storage.s3-ap-southeast-1.amazonaws.com/bert-bahasa/session-pos.pkl
Resolving huseinhouse-storage.s3-ap-southeast-1.amazonaws.com (huseinhouse-storage.s3-ap-southeast-1.amazonaws.com)... 52.219.40.27
Connecting to huseinhouse-storage.s3-ap-southeast-1.amazonaws.com (huseinhouse-storage.s3-ap-southeast-1.amazonaws.com)|52.219.40.27|:443... connect

In [2]:
import pickle
import json
import tensorflow as tf
import numpy as np

In [3]:
with open('session-pos.pkl', 'rb') as fopen:
    data = pickle.load(fopen)
data.keys()

dict_keys(['train_X', 'test_X', 'train_Y', 'test_Y'])

In [4]:
train_X = data['train_X']
test_X = data['test_X']
train_Y = data['train_Y']
test_Y = data['test_Y']

In [5]:
with open('dictionary-pos.json') as fopen:
    dictionary = json.load(fopen)
dictionary.keys()

dict_keys(['word2idx', 'idx2word', 'tag2idx', 'idx2tag', 'char2idx'])

In [6]:
word2idx = dictionary['word2idx']
idx2word = {int(k): v for k, v in dictionary['idx2word'].items()}
tag2idx = dictionary['tag2idx']
idx2tag = {int(k): v for k, v in dictionary['idx2tag'].items()}
char2idx = dictionary['char2idx']

In [7]:
list(zip([idx2word[d] for d in train_X[-1]], [idx2tag[d] for d in train_Y[-1]]))

[('-', 'PUNCT'),
 ('film', 'NOUN'),
 ('yang', 'PRON'),
 ('dibuatnya', 'VERB'),
 ('akan', 'ADV'),
 ('segera', 'ADV'),
 ('tayang', 'VERB'),
 ('.', 'PUNCT'),
 ('Jadi', 'ADV'),
 ('dicoba', 'VERB'),
 ('untuk', 'ADP'),
 ('menjawab', 'VERB'),
 ('pertanyaan-pertanyaan', 'NOUN'),
 ('seperti', 'ADP'),
 ('kebutuhan', 'NOUN'),
 ('apa', 'PRON'),
 ('yang', 'PRON'),
 ('dicoba', 'VERB'),
 ('dipuaskan', 'VERB'),
 ('oleh', 'ADP'),
 ('seseorang', 'NOUN'),
 ('?', 'PUNCT'),
 ('Kamu', 'PRON'),
 ('selalu', 'ADV'),
 ('bertanya', 'VERB'),
 ('apa', 'PRON'),
 ('itu', 'DET'),
 ('Pi', 'PROPN'),
 ('?', 'PUNCT'),
 ('Bagaimana', 'PRON'),
 ('di', 'ADP'),
 ('Indonesia', 'PROPN'),
 ('?', 'PUNCT'),
 ('Grimes', 'PROPN'),
 ('merupakan', 'VERB'),
 ('sebuah', 'DET'),
 ('di', 'ADP'),
 ('Dale', 'PROPN'),
 (',', 'PUNCT'),
 ('Alabama', 'PROPN'),
 (',', 'PUNCT'),
 ('Amerika', 'PROPN'),
 ('Serikat', 'PROPN'),
 ('.', 'PUNCT'),
 ('Sampul', 'NOUN'),
 ('dari', 'ADP'),
 ('dua', 'NUM'),
 ('singel', 'NOUN'),
 ('pertama', 'NUM'),
 ('difot

In [8]:
def generate_char_seq(batch):
    x = [[len(idx2word[i]) for i in k] for k in batch]
    maxlen = max([j for i in x for j in i])
    temp = np.zeros((batch.shape[0],batch.shape[1],maxlen),dtype=np.int32)
    for i in range(batch.shape[0]):
        for k in range(batch.shape[1]):
            for no, c in enumerate(idx2word[batch[i,k]]):
                temp[i,k,-1-no] = char2idx[c]
    return temp

In [9]:
generate_char_seq(data['train_X'][:10]).shape

(10, 50, 12)

In [11]:
class Model:
    def __init__(
        self,
        dim_word,
        dim_char,
        dropout,
        learning_rate,
        hidden_size_char,
        hidden_size_word,
        num_layers,
    ):
        def cells(size, reuse = False):
            return tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(
                    size,
                    initializer = tf.orthogonal_initializer(),
                    reuse = reuse,
                ),
                output_keep_prob = dropout,
            )

        def bahdanau(embedded, size):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units = hidden_size_word, memory = embedded
            )
            return tf.contrib.seq2seq.AttentionWrapper(
                cell = cells(hidden_size_word),
                attention_mechanism = attention_mechanism,
                attention_layer_size = hidden_size_word,
            )

        self.word_ids = tf.placeholder(tf.int32, shape = [None, None])
        self.char_ids = tf.placeholder(tf.int32, shape = [None, None, None])
        self.labels = tf.placeholder(tf.int32, shape = [None, None])
        self.maxlen = tf.shape(self.word_ids)[1]
        self.lengths = tf.count_nonzero(self.word_ids, 1)
        
        self.word_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(word2idx), dim_word], stddev = 1.0 / np.sqrt(dim_word)
            )
        )
        self.char_embeddings = tf.Variable(
            tf.truncated_normal(
                [len(char2idx), dim_char], stddev = 1.0 / np.sqrt(dim_char)
            )
        )

        word_embedded = tf.nn.embedding_lookup(
            self.word_embeddings, self.word_ids
        )
        char_embedded = tf.nn.embedding_lookup(
            self.char_embeddings, self.char_ids
        )
        s = tf.shape(char_embedded)
        char_embedded = tf.reshape(
            char_embedded, shape = [s[0] * s[1], s[-2], dim_char]
        )
        
        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(hidden_size_char),
                cell_bw = cells(hidden_size_char),
                inputs = char_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_char_%d' % (n),
            )
            char_embedded = tf.concat((out_fw, out_bw), 2)
        output = tf.reshape(
            char_embedded[:, -1], shape = [s[0], s[1], 2 * hidden_size_char]
        )
        word_embedded = tf.concat([word_embedded, output], axis = -1)

        for n in range(num_layers):
            (out_fw, out_bw), (
                state_fw,
                state_bw,
            ) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = bahdanau(word_embedded, hidden_size_word),
                cell_bw = bahdanau(word_embedded, hidden_size_word),
                inputs = word_embedded,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_word_%d' % (n),
            )
            word_embedded = tf.concat((out_fw, out_bw), 2)

        logits = tf.layers.dense(word_embedded, len(idx2tag))
        y_t = self.labels
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            logits, y_t, self.lengths
        )
        self.cost = tf.reduce_mean(-log_likelihood)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        mask = tf.sequence_mask(self.lengths, maxlen = self.maxlen)
        self.tags_seq, tags_score = tf.contrib.crf.crf_decode(
            logits, transition_params, self.lengths
        )
        self.tags_seq = tf.identity(self.tags_seq, name = 'logits')

        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(self.tags_seq, mask)
        mask_label = tf.boolean_mask(y_t, mask)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

dim_word = 128
dim_char = 256
dropout = 0.8
learning_rate = 1e-3
hidden_size_char = 128
hidden_size_word = 128
num_layers = 2
batch_size = 64

model = Model(dim_word,dim_char,dropout,learning_rate,hidden_size_char,hidden_size_word,num_layers)
sess.run(tf.global_variables_initializer())

In [13]:
string = 'KUALA LUMPUR: Sempena sambutan Aidilfitri minggu depan, Perdana Menteri Tun Dr Mahathir Mohamad dan Menteri Pengangkutan Anthony Loke Siew Fook menitipkan pesanan khas kepada orang ramai yang mahu pulang ke kampung halaman masing-masing. Dalam video pendek terbitan Jabatan Keselamatan Jalan Raya (JKJR) itu, Dr Mahathir menasihati mereka supaya berhenti berehat dan tidur sebentar  sekiranya mengantuk ketika memandu.'

import re

def entities_textcleaning(string, lowering = False):
    """
    use by entities recognition, pos recognition and dependency parsing
    """
    string = re.sub('[^A-Za-z0-9\-\/() ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    original_string = string.split()
    if lowering:
        string = string.lower()
    string = [
        (original_string[no], word.title() if word.isupper() else word)
        for no, word in enumerate(string.split())
        if len(word)
    ]
    return [s[0] for s in string], [s[1] for s in string]

def char_str_idx(corpus, dic, UNK = 0):
    maxlen = max([len(i) for i in corpus])
    X = np.zeros((len(corpus), maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i][:maxlen][::-1]):
            val = dic[k] if k in dic else UNK
            X[i, -1 - no] = val
    return X

In [14]:
from tqdm import tqdm
import time

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 2, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    lasttime = time.time()
    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, train_X.shape[0], batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, train_X.shape[0])
        batch_x = train_X[i : index]
        batch_char = generate_char_seq(batch_x)
        batch_y = train_Y[i : index]
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)
        
    pbar = tqdm(
        range(0, test_X.shape[0], batch_size), desc = 'test minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, test_X.shape[0])
        batch_x = test_X[i : index]
        batch_char = generate_char_seq(batch_x)
        batch_y = test_Y[i : index]
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
                model.labels: batch_y
            },
        )
        assert not np.isnan(cost)
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)
    
    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    
    sequence = entities_textcleaning(string)[1]
    X_seq = char_str_idx([sequence], word2idx, 2)
    X_char_seq = generate_char_seq(X_seq)

    predicted = sess.run(model.tags_seq,
                feed_dict = {
                    model.word_ids: X_seq,
                    model.char_ids: X_char_seq,
                },
        )[0]

    for i in range(len(predicted)):
        print(sequence[i],idx2tag[predicted[i]])
        
    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
    EPOCH += 1

train minibatch loop: 100%|██████████| 1524/1524 [13:39<00:00,  1.88it/s, accuracy=0.896, cost=16]  
test minibatch loop: 100%|██████████| 381/381 [01:36<00:00,  3.94it/s, accuracy=0.94, cost=9.56] 


time taken: 916.4935417175293
epoch: 0, training loss: 30.129804, training acc: 0.816989, valid loss: 15.117491, valid acc: 0.918162



train minibatch loop:   0%|          | 0/1524 [00:00<?, ?it/s]

Kuala PROPN
Lumpur PROPN
Sempena PROPN
sambutan NOUN
Aidilfitri PROPN
minggu VERB
depan ADJ
Perdana PROPN
Menteri PROPN
Tun PROPN
Dr PROPN
Mahathir PROPN
Mohamad PROPN
dan CCONJ
Menteri PROPN
Pengangkutan PROPN
Anthony PROPN
Loke PROPN
Siew PROPN
Fook PROPN
menitipkan PROPN
pesanan PROPN
khas NOUN
kepada ADP
orang NOUN
ramai NOUN
yang PRON
mahu ADJ
pulang VERB
ke ADP
kampung NOUN
halaman NOUN
masing-masing PROPN
Dalam ADP
video NOUN
pendek ADJ
terbitan NOUN
Jabatan PROPN
Keselamatan PROPN
Jalan PROPN
Raya PROPN
(Jkjr) PROPN
itu DET
Dr PROPN
Mahathir PROPN
menasihati PROPN
mereka PRON
supaya PART
berhenti VERB
berehat PROPN
dan CCONJ
tidur NOUN
sebentar ADJ
sekiranya PROPN
mengantuk PROPN
ketika SCONJ
memandu VERB
epoch: 0, pass acc: 0.000000, current acc: 0.918162


train minibatch loop: 100%|██████████| 1524/1524 [13:30<00:00,  1.90it/s, accuracy=0.95, cost=10.8] 
test minibatch loop: 100%|██████████| 381/381 [01:35<00:00,  4.01it/s, accuracy=0.948, cost=10]  


time taken: 906.6713445186615
epoch: 1, training loss: 10.626078, training acc: 0.939507, valid loss: 15.288042, valid acc: 0.921442



train minibatch loop:   0%|          | 0/1524 [00:00<?, ?it/s]

Kuala PROPN
Lumpur PROPN
Sempena PROPN
sambutan NOUN
Aidilfitri PROPN
minggu NOUN
depan ADJ
Perdana PROPN
Menteri PROPN
Tun PROPN
Dr PROPN
Mahathir PROPN
Mohamad PROPN
dan CCONJ
Menteri PROPN
Pengangkutan PROPN
Anthony PROPN
Loke PROPN
Siew PROPN
Fook PROPN
menitipkan PROPN
pesanan PROPN
khas VERB
kepada ADP
orang NOUN
ramai NOUN
yang PRON
mahu DET
pulang VERB
ke ADP
kampung NOUN
halaman NOUN
masing-masing PROPN
Dalam ADP
video NOUN
pendek ADJ
terbitan NOUN
Jabatan NOUN
Keselamatan PROPN
Jalan PROPN
Raya PROPN
(Jkjr) PROPN
itu DET
Dr PROPN
Mahathir PUNCT
menasihati PROPN
mereka PRON
supaya SCONJ
berhenti VERB
berehat PROPN
dan CCONJ
tidur NOUN
sebentar ADV
sekiranya PUNCT
mengantuk PROPN
ketika SCONJ
memandu VERB
epoch: 1, pass acc: 0.918162, current acc: 0.921442


train minibatch loop: 100%|██████████| 1524/1524 [13:27<00:00,  1.90it/s, accuracy=0.945, cost=8.01] 
test minibatch loop: 100%|██████████| 381/381 [01:35<00:00,  4.06it/s, accuracy=0.933, cost=12.7]


time taken: 903.2434318065643
epoch: 2, training loss: 7.451073, training acc: 0.956498, valid loss: 17.149170, valid acc: 0.919412



train minibatch loop:   0%|          | 0/1524 [00:00<?, ?it/s]

Kuala PROPN
Lumpur PROPN
Sempena PUNCT
sambutan NOUN
Aidilfitri PROPN
minggu VERB
depan NOUN
Perdana PROPN
Menteri PROPN
Tun PROPN
Dr PROPN
Mahathir PROPN
Mohamad PROPN
dan CCONJ
Menteri PROPN
Pengangkutan PROPN
Anthony PROPN
Loke PROPN
Siew PROPN
Fook PROPN
menitipkan PROPN
pesanan PROPN
khas VERB
kepada ADP
orang NOUN
ramai NOUN
yang PRON
mahu ADV
pulang VERB
ke ADP
kampung NOUN
halaman NOUN
masing-masing PROPN
Dalam ADP
video NOUN
pendek ADJ
terbitan NOUN
Jabatan NOUN
Keselamatan PROPN
Jalan PROPN
Raya PROPN
(Jkjr) PROPN
itu DET
Dr PROPN
Mahathir PROPN
menasihati PROPN
mereka PRON
supaya SCONJ
berhenti VERB
berehat PROPN
dan CCONJ
tidur NOUN
sebentar ADV
sekiranya PROPN
mengantuk PROPN
ketika SCONJ
memandu VERB


train minibatch loop: 100%|██████████| 1524/1524 [13:27<00:00,  1.91it/s, accuracy=0.969, cost=4.91] 
test minibatch loop: 100%|██████████| 381/381 [01:35<00:00,  4.09it/s, accuracy=0.921, cost=14.5]


time taken: 902.8044228553772
epoch: 3, training loss: 5.376336, training acc: 0.967726, valid loss: 20.922392, valid acc: 0.905358

Kuala PROPN
Lumpur PROPN
Sempena PROPN
sambutan NOUN
Aidilfitri PROPN
minggu NOUN
depan ADJ
Perdana PROPN
Menteri PROPN
Tun PROPN
Dr PROPN
Mahathir PROPN
Mohamad PROPN
dan CCONJ
Menteri PROPN
Pengangkutan PROPN
Anthony PROPN
Loke PROPN
Siew PROPN
Fook PROPN
menitipkan PROPN
pesanan PROPN
khas VERB
kepada ADP
orang NOUN
ramai NOUN
yang PRON
mahu ADV
pulang VERB
ke ADP
kampung NOUN
halaman NOUN
masing-masing NOUN
Dalam ADP
video NOUN
pendek ADJ
terbitan NOUN
Jabatan NOUN
Keselamatan NOUN
Jalan NOUN
Raya PROPN
(Jkjr) NOUN
itu DET
Dr ADV
Mahathir PROPN
menasihati PROPN
mereka PRON
supaya SCONJ
berhenti VERB
berehat PROPN
dan CCONJ
tidur NOUN
sebentar ADV
sekiranya PROPN
mengantuk PROPN
ketika SCONJ
memandu VERB
break epoch:4



In [16]:
sequence = entities_textcleaning('mahathir suka Akta 19977')[1]
X_seq = char_str_idx([sequence], word2idx, 2)
X_char_seq = generate_char_seq(X_seq)

predicted = sess.run(model.tags_seq,
            feed_dict = {
                model.word_ids: X_seq,
                model.char_ids: X_char_seq,
            },
    )[0]

for i in range(len(predicted)):
    print(sequence[i],idx2tag[predicted[i]])

mahathir PROPN
suka VERB
Akta PROPN
19977 PROPN


In [17]:
def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            out_i.append(idx2tag[p])
        out.append(out_i)
    return out

In [18]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, len(test_X), batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
    batch_char = generate_char_seq(batch_x)
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predicted = pred2label(sess.run(model.tags_seq,
            feed_dict = {
                model.word_ids: batch_x,
                model.char_ids: batch_char,
            },
    ))
    real = pred2label(batch_y)
    predict_Y.extend(predicted)
    real_Y.extend(real)

validation minibatch loop: 100%|██████████| 381/381 [01:33<00:00,  4.19it/s]


In [19]:
from sklearn.metrics import classification_report
print(classification_report(np.array(real_Y).ravel(), np.array(predict_Y).ravel(),
                           digits = 6))

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

         ADJ   0.840047  0.621951  0.714731     45666
         ADP   0.964556  0.949134  0.956783    119589
         ADV   0.807150  0.835846  0.821247     47760
         AUX   0.980583  0.999900  0.990147     10000
       CCONJ   0.973852  0.910791  0.941266     37171
         DET   0.952105  0.917197  0.934325     38839
        NOUN   0.789860  0.935113  0.856371    268329
         NUM   0.920680  0.936206  0.928378     41211
        PART   0.933212  0.835818  0.881834      5500
        PRON   0.977711  0.935968  0.956384     48835
       PROPN   0.944440  0.816074  0.875577    227608
       PUNCT   0.997880  0.999076  0.998478    182824
       SCONJ   0.740312  0.796898  0.767563     15150
         SYM   0.999425  0.965556  0.982198      3600
        VERB   0.931810  0.917996  0.924851    124518
           X   0.000000  0.000000  0.000000       150

    accuracy                       0.903527   1216750
   macro avg   0.859601  0

In [20]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'bahdanau/model.ckpt')

strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
        and 'OptimizeLoss' not in n.name
        and 'Global_Step' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'Variable',
 'Variable_1',
 'bidirectional_rnn_char_0/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/fw/lstm_cell/bias',
 'bidirectional_rnn_char_0/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_0/bw/lstm_cell/bias',
 'bidirectional_rnn_char_1/fw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/fw/lstm_cell/bias',
 'bidirectional_rnn_char_1/bw/lstm_cell/kernel',
 'bidirectional_rnn_char_1/bw/lstm_cell/bias',
 'memory_layer/kernel',
 'memory_layer_1/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/lstm_cell/bias',
 'bidirectional_rnn_word_0/fw/attention_wrapper/bahdanau_attention/query_layer/kernel',
 'bidirectional_rnn_word_0/fw/attention_wrapper/bahdanau_attention/attention_v',
 'bidirectional_rnn_word_0/fw/attention_wrapper/attention_layer/kernel',
 'bidirectional_rnn_word_0/bw/attention_wrapper/lstm_cell/kernel',
 'bidirectional_rnn_word_0/bw/attention_wrapp

In [21]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))
        
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph


In [22]:
freeze_graph('bahdanau', strings)

1928 ops in the final graph.


In [23]:
import boto3

bucketName = 'huseinhouse-storage'
Key = 'bahdanau/frozen_model.pb'
outPutname = "v27/pos/bahdanau-pos.pb"

s3 = boto3.client('s3',
                 aws_access_key_id='',
                 aws_secret_access_key='')
s3.upload_file(Key,bucketName,outPutname)