In [1]:
import re
import numpy as np
import pandas as pd
import collections
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.cross_validation import train_test_split
from unidecode import unidecode
from tqdm import tqdm
import itertools
import time



In [2]:
permulaan = [
    'bel',
    'se',
    'ter',
    'men',
    'meng',
    'mem',
    'memper',
    'di',
    'pe',
    'me',
    'ke',
    'ber',
    'pen',
    'per',
]

hujung = ['kan', 'kah', 'lah', 'tah', 'nya', 'an', 'wan', 'wati', 'ita']

def naive_stemmer(word):
    assert isinstance(word, str), 'input must be a string'
    hujung_result = re.findall(r'^(.*?)(%s)$' % ('|'.join(hujung)), word)
    word = hujung_result[0][0] if len(hujung_result) else word
    permulaan_result = re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan[::-1])), word)
    permulaan_result.extend(re.findall(r'^(.*?)(%s)' % ('|'.join(permulaan)), word))
    mula = permulaan_result if len(permulaan_result) else ''
    if len(mula):
        mula = mula[1][1] if len(mula[1][1]) > len(mula[0][1]) else mula[0][1]
    return word.replace(mula, '')

In [3]:
def _pad_sequence(
    sequence,
    n,
    pad_left = False,
    pad_right = False,
    left_pad_symbol = None,
    right_pad_symbol = None,
):
    sequence = iter(sequence)
    if pad_left:
        sequence = itertools.chain((left_pad_symbol,) * (n - 1), sequence)
    if pad_right:
        sequence = itertools.chain(sequence, (right_pad_symbol,) * (n - 1))
    return sequence


def ngrams(
    sequence,
    n,
    pad_left = False,
    pad_right = False,
    left_pad_symbol = None,
    right_pad_symbol = None,
):
    """
    generate ngrams

    Parameters
    ----------
    sequence : list of str
        list of tokenize words
    n : int
        ngram size

    Returns
    -------
    ngram: list
    """
    sequence = _pad_sequence(
        sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
    )

    history = []
    while n > 1:
        try:
            next_item = next(sequence)
        except StopIteration:
            return
        history.append(next_item)
        n -= 1
    for item in sequence:
        history.append(item)
        yield tuple(history)
        del history[0]

def build_dataset(words, n_words):
    count = [['GO', 0], ['PAD', 1], ['EOS', 2], ['UNK', 3]]
    counter = collections.Counter(words).most_common(n_words)
    count.extend(counter)
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 3)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary


def classification_textcleaning(string):
    string = re.sub(
        'http\S+|www.\S+',
        '',
        ' '.join(
            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
        ),
    )
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = ' '.join(
        [i for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string) if len(i)]
    )
    string = string.lower().split()
    string = [(naive_stemmer(word), word) for word in string]
    return (
        ' '.join([word[0] for word in string if len(word[0]) > 1]),
        ' '.join([word[1] for word in string if len(word[0]) > 1]),
    )


def str_idx(corpus, dic, maxlen, UNK = 3):
    X = np.zeros((len(corpus), maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i].split()[:maxlen][::-1]):
            val = dic[k] if k in dic else UNK
            X[i, -1 - no] = val
    return X

In [4]:
df = pd.read_csv('dataset/sentiment-data-v2.csv')
Y = LabelEncoder().fit_transform(df.label)
df.head()

Unnamed: 0,label,text
0,Negative,Lebih-lebih lagi dengan kemudahan internet da...
1,Positive,boleh memberi teguran kepada parti tetapi perl...
2,Negative,Adalah membingungkan mengapa masyarakat Cina b...
3,Positive,Kami menurunkan defisit daripada 6.7 peratus p...
4,Negative,"Ini masalahnya. Bukan rakyat, tetapi sistem"


In [5]:
with open('dataset/polarity-negative-translated.txt','r') as fopen:
    texts = fopen.read().split('\n')
labels = [0] * len(texts)

with open('dataset/polarity-positive-translated.txt','r') as fopen:
    positive_texts = fopen.read().split('\n')
labels += [1] * len(positive_texts)
texts += positive_texts
texts += df.iloc[:,1].tolist()
labels += Y.tolist()

assert len(labels) == len(texts)

In [6]:
for i in range(len(texts)):
    texts[i] = classification_textcleaning(texts[i])[0]

In [7]:
texts_trigram = []
for i in range(len(texts)):
    splitted = texts[i].split()
    lists_gram = []
    for split in splitted:
        lists_gram.extend([''.join(gram) for gram in list(ngrams(split,3))])
    texts_trigram.append(' '.join(lists_gram))
texts_trigram[:10]

['rin ing ngk gka kas bod odo doh bos osa san',
 'kan ana nak kan ana nak lel ela lak aki rem ema maj aja yan ang beg egi git itu mud uda mud uda dap apa pat eka den eng luc ucu',
 'eks ksp spl plo loi oit ita tat ati tif bah aha hag agi bes esa sar tid ida dak pun uny nya yai dal ala lam ata tau can ang ngg ggi gih yan ang gaw awa was asi raw awa wat gra raf afi fis per ert rti jah aha hat yan ang tan ang ngg ggu gun ung',
 'gar arb rbu bus bua uan ang pot ote ten ens nsi unt ntu tuk kaj aji pat ato tol olo log ogi gga gan ant nti lod odr dra ram ama mir iri rin ing ada ada',
 'nam luc uca cah tet eta tap api ang car ara ang ang emo mos osi dal ala lam gay aya mah aha ham',
 'cer itu jug uga tid ida dak asl sli ran ana rek eka dat ata tan ang sud uda dah kit ita tar mul ula leb ebi bih ban any nya yak kal ali dar ari rip ipa pad ada jag aga unt ntu tuk kir ira',
 'gen ena nai sat atu sat atu kar ara yan ang ber eri mat ata fil ile lem ada bra rav ava vad ado unt ntu tuk gam amb mbi bi

In [8]:
def preprocessing(string):
    string = classification_textcleaning(string)[0]
    splitted = string.split()
    lists_gram = []
    for split in splitted:
        lists_gram.extend([''.join(gram) for gram in list(ngrams(split,3))])
    return ' '.join(lists_gram)

In [9]:
concat = ' '.join(texts_trigram).split()
vocabulary_size = len(list(set(concat)))
data, count, dictionary, rev_dictionary = build_dataset(concat, vocabulary_size)
print('vocab from size: %d'%(vocabulary_size))
print('Most common words', count[4:10])
print('Sample data', data[:10], [rev_dictionary[i] for i in data[:10]])

vocab from size: 5362
Most common words [('ang', 26934), ('yan', 15236), ('ada', 7896), ('ala', 5266), ('ntu', 5141), ('dak', 5037)]
Sample data [225, 22, 53, 136, 331, 631, 580, 591, 566, 470] ['rin', 'ing', 'ngk', 'gka', 'kas', 'bod', 'odo', 'doh', 'bos', 'osa']


In [10]:
class Model:
    def __init__(self, size_layer, num_layers, embedded_size,
                 dict_size, dimension_output, learning_rate, dropout):
        
        def cells(size, reuse = False):
            return tf.contrib.rnn.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(
                    size,
                    initializer = tf.orthogonal_initializer(),
                    reuse = reuse,
                ),
                state_keep_prob = dropout,
                output_keep_prob = dropout,
            )
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None])
        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        rnn_cells = tf.nn.rnn_cell.MultiRNNCell([cells(size_layer) for _ in range(num_layers)])
        outputs, _ = tf.nn.dynamic_rnn(rnn_cells, encoder_embedded, dtype = tf.float32)
        W = tf.get_variable(
            'w',
            shape = (size_layer, dimension_output),
            initializer = tf.orthogonal_initializer(),
        )
        b = tf.get_variable(
            'b', shape = (dimension_output), initializer = tf.zeros_initializer()
        )
        self.logits = tf.add(tf.matmul(outputs[:,-1], W), b, name = 'logits')
        self.cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits = self.logits, labels = self.Y
            )
        )
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate = learning_rate
        ).minimize(self.cost)
        self.accuracy = tf.reduce_mean(
            tf.cast(tf.nn.in_top_k(self.logits, self.Y, 1), tf.float32)
        )

In [11]:
size_layer = 256
num_layers = 2
embedded_size = 256
dimension_output = 2
learning_rate = 1e-4
batch_size = 32
dropout = 0.95
maxlen = 150

tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(
    size_layer,
    num_layers,
    embedded_size,
    len(dictionary),
    dimension_output,
    learning_rate,
    dropout,
)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'recurrent-char/model.ckpt')

'recurrent-char/model.ckpt'

In [12]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
    ]
)

In [13]:
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Variable',
 'rnn/multi_rnn_cell/cell_0/lstm_cell/kernel',
 'rnn/multi_rnn_cell/cell_0/lstm_cell/bias',
 'rnn/multi_rnn_cell/cell_1/lstm_cell/kernel',
 'rnn/multi_rnn_cell/cell_1/lstm_cell/bias',
 'w',
 'b',
 'logits',
 'gradients/logits_grad/Shape',
 'gradients/logits_grad/Shape_1',
 'gradients/logits_grad/BroadcastGradientArgs',
 'gradients/logits_grad/Sum',
 'gradients/logits_grad/Reshape',
 'gradients/logits_grad/Sum_1',
 'gradients/logits_grad/Reshape_1',
 'gradients/logits_grad/tuple/group_deps',
 'gradients/logits_grad/tuple/control_dependency',
 'gradients/logits_grad/tuple/control_dependency_1']

In [14]:
tf.trainable_variables()

[<tf.Variable 'Variable:0' shape=(5366, 256) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0' shape=(512, 1024) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0' shape=(512, 1024) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'w:0' shape=(256, 2) dtype=float32_ref>,
 <tf.Variable 'b:0' shape=(2,) dtype=float32_ref>]

In [15]:
vectors = str_idx(texts_trigram, dictionary, maxlen)
train_X, test_X, train_Y, test_Y = train_test_split(
    vectors, labels, test_size = 0.2
)

In [16]:
from tqdm import tqdm
import time

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 5, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, len(test_X), batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1


train minibatch loop: 100%|██████████| 357/357 [00:56<00:00,  6.37it/s, accuracy=0.613, cost=0.623]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 19.73it/s, accuracy=0.75, cost=0.579] 
train minibatch loop:   0%|          | 1/357 [00:00<00:56,  6.26it/s, accuracy=0.562, cost=0.648]

epoch: 0, pass acc: 0.000000, current acc: 0.616597
time taken: 61.14745259284973
epoch: 0, training loss: 0.668460, training acc: 0.588778, valid loss: 0.659608, valid acc: 0.616597



train minibatch loop: 100%|██████████| 357/357 [00:56<00:00,  6.35it/s, accuracy=0.774, cost=0.551]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 19.89it/s, accuracy=0.625, cost=0.521]
train minibatch loop:   0%|          | 1/357 [00:00<00:55,  6.38it/s, accuracy=0.5, cost=0.654]

epoch: 1, pass acc: 0.616597, current acc: 0.647059
time taken: 60.79658222198486
epoch: 1, training loss: 0.617211, training acc: 0.665392, valid loss: 0.638821, valid acc: 0.647059



train minibatch loop: 100%|██████████| 357/357 [00:56<00:00,  6.37it/s, accuracy=0.806, cost=0.498]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 20.03it/s, accuracy=0.75, cost=0.515] 
train minibatch loop:   0%|          | 1/357 [00:00<00:55,  6.36it/s, accuracy=0.469, cost=0.652]

epoch: 2, pass acc: 0.647059, current acc: 0.665616
time taken: 60.78228831291199
epoch: 2, training loss: 0.580469, training acc: 0.695685, valid loss: 0.628927, valid acc: 0.665616



train minibatch loop: 100%|██████████| 357/357 [00:56<00:00,  6.39it/s, accuracy=0.806, cost=0.452]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 20.03it/s, accuracy=0.5, cost=0.55]   
train minibatch loop:   0%|          | 1/357 [00:00<00:54,  6.50it/s, accuracy=0.562, cost=0.616]

time taken: 60.588990211486816
epoch: 3, training loss: 0.546634, training acc: 0.720634, valid loss: 0.630206, valid acc: 0.664566



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.36it/s, accuracy=0.839, cost=0.419]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 19.94it/s, accuracy=0.5, cost=0.604]  
train minibatch loop:   0%|          | 1/357 [00:00<00:55,  6.46it/s, accuracy=0.594, cost=0.616]

epoch: 4, pass acc: 0.665616, current acc: 0.674370
time taken: 60.42559885978699
epoch: 4, training loss: 0.514601, training acc: 0.741647, valid loss: 0.640544, valid acc: 0.674370



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.42it/s, accuracy=0.871, cost=0.397]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 19.94it/s, accuracy=0.5, cost=0.649]  
train minibatch loop:   0%|          | 1/357 [00:00<00:55,  6.38it/s, accuracy=0.719, cost=0.593]

epoch: 5, pass acc: 0.674370, current acc: 0.675070
time taken: 60.36845088005066
epoch: 5, training loss: 0.480747, training acc: 0.767300, valid loss: 0.656224, valid acc: 0.675070



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.39it/s, accuracy=0.871, cost=0.352]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 20.01it/s, accuracy=0.5, cost=0.729]  
train minibatch loop:   0%|          | 1/357 [00:00<00:55,  6.47it/s, accuracy=0.75, cost=0.572]

epoch: 6, pass acc: 0.675070, current acc: 0.678922
time taken: 60.22839856147766
epoch: 6, training loss: 0.444079, training acc: 0.791637, valid loss: 0.688800, valid acc: 0.678922



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.41it/s, accuracy=0.903, cost=0.307]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 19.97it/s, accuracy=0.625, cost=0.781]
train minibatch loop:   0%|          | 1/357 [00:00<00:54,  6.47it/s, accuracy=0.75, cost=0.537]

time taken: 60.30971026420593
epoch: 7, training loss: 0.406277, training acc: 0.814138, valid loss: 0.740637, valid acc: 0.678221



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.41it/s, accuracy=0.968, cost=0.22] 
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 20.14it/s, accuracy=0.5, cost=0.99]   
train minibatch loop:   0%|          | 1/357 [00:00<00:55,  6.44it/s, accuracy=0.781, cost=0.458]

time taken: 60.260737895965576
epoch: 8, training loss: 0.364735, training acc: 0.837343, valid loss: 0.825598, valid acc: 0.668067



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.40it/s, accuracy=0.935, cost=0.214]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 19.98it/s, accuracy=0.5, cost=0.894]  
train minibatch loop:   0%|          | 1/357 [00:00<00:55,  6.44it/s, accuracy=0.812, cost=0.381]

time taken: 60.30143165588379
epoch: 9, training loss: 0.320435, training acc: 0.859138, valid loss: 0.862100, valid acc: 0.661064



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.41it/s, accuracy=0.935, cost=0.193]
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 20.01it/s, accuracy=0.5, cost=1.11]   
train minibatch loop:   0%|          | 1/357 [00:00<00:54,  6.47it/s, accuracy=0.781, cost=0.367]

time taken: 60.27753210067749
epoch: 10, training loss: 0.286791, training acc: 0.875946, valid loss: 1.006772, valid acc: 0.664566



train minibatch loop: 100%|██████████| 357/357 [00:55<00:00,  6.43it/s, accuracy=1, cost=0.129]     
test minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 20.00it/s, accuracy=0.625, cost=0.727]

time taken: 60.28226399421692
epoch: 11, training loss: 0.258542, training acc: 0.890747, valid loss: 1.056794, valid acc: 0.660714

break epoch:12






In [17]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, len(test_X), batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x, model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y


validation minibatch loop: 100%|██████████| 90/90 [00:04<00:00, 20.34it/s]


In [18]:
print(
    metrics.classification_report(
        real_Y, predict_Y, target_names = ['negative', 'positive']
    )
)

             precision    recall  f1-score   support

   negative       0.63      0.59      0.61      1307
   positive       0.67      0.70      0.69      1549

avg / total       0.65      0.65      0.65      2856



In [28]:
text = preprocessing(texts[0])
print(text)
new_vector = str_idx([text[0]], dictionary, len(text[0].split()))
sess.run(tf.nn.softmax(model.logits), feed_dict={model.X:new_vector})

rin ing ngk gka kas bod odo doh bos


array([[0.53504175, 0.46495822]], dtype=float32)

In [20]:
text = preprocessing('kerajaan sebenarnya sangat sayangkan rakyatnya')
print(text)
new_vector = str_idx([text[0]], dictionary, len(text[0].split()))
sess.run(tf.nn.softmax(model.logits), feed_dict={model.X:new_vector})

raj aja ben ena nar san ang nga gat say aya yan ang rak aky kya yat


array([[0.5367403 , 0.46325973]], dtype=float32)

In [21]:
text = preprocessing('kerajaan sebenarnya sangat sayangkan rakyatnya, tetapi sebenarnya benci')
print(text)
new_vector = str_idx([text[0]], dictionary, len(text[0].split()))
sess.run(tf.nn.softmax(model.logits), feed_dict={model.X:new_vector})

raj aja ben ena nar san ang nga gat say aya yan ang rak aky kya yat tet eta tap api ben ena nar ben enc nci


array([[0.53222364, 0.46777636]], dtype=float32)

In [22]:
import json
with open('recurrent-char-sentiment.json','w') as fopen:
    fopen.write(json.dumps({'dictionary':dictionary,'reverse_dictionary':rev_dictionary}))

In [23]:
saver.save(sess, 'recurrent-char/model.ckpt')

'recurrent-char/model.ckpt'

In [24]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [25]:
freeze_graph('recurrent-char', strings)

INFO:tensorflow:Restoring parameters from recurrent-char/model.ckpt
INFO:tensorflow:Froze 7 variables.
INFO:tensorflow:Converted 7 variables to const ops.
270 ops in the final graph.


In [26]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [27]:
g = load_graph('recurrent-char/frozen_model.pb')
x = g.get_tensor_by_name('import/Placeholder:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph = g)
test_sess.run(tf.nn.softmax(logits), feed_dict = {x: new_vector})



array([[0.540204, 0.459796]], dtype=float32)