In [1]:
import re
import numpy as np
import pandas as pd
import collections
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.cross_validation import train_test_split
from unidecode import unidecode
from nltk.util import ngrams
from tqdm import tqdm
import time



In [2]:
permulaan = [
    'bel',
    'se',
    'ter',
    'men',
    'meng',
    'mem',
    'memper',
    'di',
    'pe',
    'me',
    'ke',
    'ber',
    'pen',
    'per',
]

hujung = ['kan', 'kah', 'lah', 'tah', 'nya', 'an', 'wan', 'wati', 'ita']

def naive_stemmer(word):
    assert isinstance(word, str), 'input must be a string'
    hujung_result = [e for e in hujung if word.endswith(e)]
    if len(hujung_result):
        hujung_result = max(hujung_result, key = len)
        if len(hujung_result):
            word = word[: -len(hujung_result)]
    permulaan_result = [e for e in permulaan if word.startswith(e)]
    if len(permulaan_result):
        permulaan_result = max(permulaan_result, key = len)
        if len(permulaan_result):
            word = word[len(permulaan_result) :]
    return word

In [3]:
def build_dataset(words, n_words):
    count = [['GO', 0], ['PAD', 1], ['EOS', 2], ['UNK', 3]]
    counter = collections.Counter(words).most_common(n_words)
    count.extend(counter)
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 3)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary


def classification_textcleaning(string):
    string = re.sub(
        'http\S+|www.\S+',
        '',
        ' '.join(
            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
        ),
    )
    string = unidecode(string).replace('.', ' . ').replace(',', ' , ')
    string = re.sub('[^A-Za-z ]+', ' ', string)
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = ' '.join(
        [i for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string) if len(i)]
    )
    string = string.lower().split()
    string = [naive_stemmer(word) for word in string]
    return ' '.join([word for word in string if len(word) > 1])


def str_idx(corpus, dic, maxlen, UNK = 3):
    X = np.zeros((len(corpus), maxlen))
    for i in range(len(corpus)):
        for no, k in enumerate(corpus[i].split()[:maxlen][::-1]):
            X[i, -1 - no] = dic.get(k, UNK)
    return X

In [4]:
classification_textcleaning('kerajaan sebenarnya sangat bencikan rakyatnya, minyak naik dan segalanya')

'raja benar sangat benci rakyat minyak naik gala'

In [5]:
with open('subjectivity-negative-translated.txt','r') as fopen:
    texts = fopen.read().split('\n')
labels = [0] * len(texts)

with open('subjectivity-positive-translated.txt','r') as fopen:
    positive_texts = fopen.read().split('\n')
labels += [1] * len(positive_texts)
texts += positive_texts

assert len(labels) == len(texts)

In [6]:
for i in range(len(texts)):
    texts[i] = classification_textcleaning(texts[i])

In [7]:
concat = ' '.join(texts).split()
vocabulary_size = len(list(set(concat)))
data, count, dictionary, rev_dictionary = build_dataset(concat, vocabulary_size)
print('vocab from size: %d'%(vocabulary_size))
print('Most common words', count[4:10])
print('Sample data', data[:10], [rev_dictionary[i] for i in data[:10]])

vocab from size: 13269
Most common words [('yang', 11804), ('untuk', 3879), ('tidak', 2898), ('deng', 2827), ('ada', 2298), ('dalam', 2193)]
Sample data [10, 68, 13, 28, 55, 53, 11, 387, 34, 186] ['filem', 'mula', 'pada', 'masa', 'lalu', 'mana', 'orang', 'budak', 'lelaki', 'nama']


In [8]:
max_features = len(dictionary)
maxlen = 100
batch_size = 32
embedded_size = 256

In [9]:
X = str_idx(texts, dictionary, maxlen)

In [10]:
train_X, test_X, train_Y, test_Y = train_test_split(X, 
                                                    labels,
                                                    test_size = 0.2)

In [11]:
class Model:
    def __init__(
        self, embedded_size, dict_size, dimension_output, learning_rate
    ):

        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None])
        encoder_embeddings = tf.Variable(
            tf.random_uniform([dict_size, embedded_size], -1, 1)
        )
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        self.logits = tf.identity(
            tf.layers.dense(
                tf.reduce_mean(encoder_embedded, 1), dimension_output
            ),
            name = 'logits',
        )
        self.cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits = self.logits, labels = self.Y
            )
        )
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost
        )
        correct_pred = tf.equal(
            tf.argmax(self.logits, 1, output_type = tf.int32), self.Y
        )
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(embedded_size, max_features, 2, 5e-4)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'fast-text/model.ckpt')

'fast-text/model.ckpt'

In [13]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name)
        and 'Adam' not in n.name
        and 'beta' not in n.name
    ]
)

In [14]:
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Variable',
 'dense/kernel',
 'dense/bias',
 'logits']

In [15]:
tf.trainable_variables()

[<tf.Variable 'Variable:0' shape=(13273, 256) dtype=float32_ref>,
 <tf.Variable 'dense/kernel:0' shape=(256, 2) dtype=float32_ref>,
 <tf.Variable 'dense/bias:0' shape=(2,) dtype=float32_ref>]

In [16]:
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        batch_x = train_X[i : min(i + batch_size, train_X.shape[0])]
        batch_y = train_Y[i : min(i + batch_size, train_X.shape[0])]
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.X: batch_x,
                model.Y: batch_y
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    pbar = tqdm(range(0, len(test_X), batch_size), desc = 'test minibatch loop')
    for i in pbar:
        batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
        batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
        batch_x_expand = np.expand_dims(batch_x,axis = 1)
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.X: batch_x,
                model.Y: batch_y
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1

    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

saver.save(sess, "fast-text/model.ckpt")

train minibatch loop: 100%|██████████| 250/250 [00:03<00:00, 80.20it/s, accuracy=0, cost=0.779]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 231.76it/s, accuracy=0.667, cost=0.646]
train minibatch loop:   3%|▎         | 7/250 [00:00<00:03, 66.79it/s, accuracy=0.594, cost=0.666]

epoch: 0, pass acc: 0.000000, current acc: 0.609299
time taken: 3.391561985015869
epoch: 0, training loss: 0.685732, training acc: 0.553896, valid loss: 0.676238, valid acc: 0.609299



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.88it/s, accuracy=0, cost=0.736]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 222.72it/s, accuracy=0.667, cost=0.621]
train minibatch loop:   4%|▎         | 9/250 [00:00<00:02, 81.30it/s, accuracy=0.688, cost=0.632]

epoch: 1, pass acc: 0.609299, current acc: 0.761833
time taken: 3.1963703632354736
epoch: 1, training loss: 0.648177, training acc: 0.701845, valid loss: 0.635186, valid acc: 0.761833



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 87.14it/s, accuracy=1, cost=0.676]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 222.57it/s, accuracy=0.778, cost=0.59] 
train minibatch loop:   4%|▍         | 11/250 [00:00<00:02, 106.93it/s, accuracy=0.875, cost=0.512]

epoch: 2, pass acc: 0.761833, current acc: 0.838881
time taken: 3.2188074588775635
epoch: 2, training loss: 0.598484, training acc: 0.799724, valid loss: 0.580830, valid acc: 0.838881



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.58it/s, accuracy=1, cost=0.607]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 236.82it/s, accuracy=0.778, cost=0.56] 
train minibatch loop:   4%|▎         | 9/250 [00:00<00:02, 85.48it/s, accuracy=0.906, cost=0.44] 

epoch: 3, pass acc: 0.838881, current acc: 0.852930
time taken: 3.1890017986297607
epoch: 3, training loss: 0.537547, training acc: 0.848162, valid loss: 0.521171, valid acc: 0.852930



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.20it/s, accuracy=1, cost=0.54]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 232.53it/s, accuracy=0.778, cost=0.538]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 73.89it/s, accuracy=0.875, cost=0.421]

epoch: 4, pass acc: 0.852930, current acc: 0.859954
time taken: 3.206967353820801
epoch: 4, training loss: 0.475943, training acc: 0.868616, valid loss: 0.467396, valid acc: 0.859954



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 89.77it/s, accuracy=1, cost=0.481]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 228.06it/s, accuracy=0.778, cost=0.523]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 76.36it/s, accuracy=0.875, cost=0.378]

epoch: 5, pass acc: 0.859954, current acc: 0.865975
time taken: 3.1982595920562744
epoch: 5, training loss: 0.422640, training acc: 0.880914, valid loss: 0.424664, valid acc: 0.865975



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.60it/s, accuracy=1, cost=0.431]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 216.14it/s, accuracy=0.778, cost=0.514]
train minibatch loop:   5%|▍         | 12/250 [00:00<00:02, 117.18it/s, accuracy=0.844, cost=0.372]

epoch: 6, pass acc: 0.865975, current acc: 0.875509
time taken: 3.2150681018829346
epoch: 6, training loss: 0.379994, training acc: 0.888945, valid loss: 0.392454, valid acc: 0.875509



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 83.97it/s, accuracy=1, cost=0.391]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 235.45it/s, accuracy=0.778, cost=0.508]
train minibatch loop:   4%|▎         | 9/250 [00:00<00:02, 84.60it/s, accuracy=0.719, cost=0.498]

epoch: 7, pass acc: 0.875509, current acc: 0.878018
time taken: 3.2466814517974854
epoch: 7, training loss: 0.346466, training acc: 0.894968, valid loss: 0.368386, valid acc: 0.878018



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 84.28it/s, accuracy=1, cost=0.359]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 177.22it/s, accuracy=0.778, cost=0.504]
train minibatch loop:   4%|▎         | 9/250 [00:00<00:02, 84.59it/s, accuracy=0.906, cost=0.276]

epoch: 8, pass acc: 0.878018, current acc: 0.880025
time taken: 3.2371163368225098
epoch: 8, training loss: 0.319742, training acc: 0.899486, valid loss: 0.350208, valid acc: 0.880025



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.55it/s, accuracy=1, cost=0.332]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 255.06it/s, accuracy=0.778, cost=0.501]
train minibatch loop:   3%|▎         | 7/250 [00:00<00:03, 65.36it/s, accuracy=0.906, cost=0.245]

epoch: 9, pass acc: 0.880025, current acc: 0.882533
time taken: 3.1710965633392334
epoch: 9, training loss: 0.297900, training acc: 0.905258, valid loss: 0.336245, valid acc: 0.882533



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 86.30it/s, accuracy=1, cost=0.31]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 212.60it/s, accuracy=0.778, cost=0.499]
train minibatch loop:   4%|▍         | 11/250 [00:00<00:02, 104.89it/s, accuracy=0.938, cost=0.182]

epoch: 10, pass acc: 0.882533, current acc: 0.885042
time taken: 3.1959893703460693
epoch: 10, training loss: 0.279566, training acc: 0.911532, valid loss: 0.325334, valid acc: 0.885042



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 86.02it/s, accuracy=1, cost=0.292]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 235.11it/s, accuracy=0.778, cost=0.498]
train minibatch loop:   4%|▎         | 9/250 [00:00<00:03, 79.07it/s, accuracy=0.75, cost=0.455] 

epoch: 11, pass acc: 0.885042, current acc: 0.890060
time taken: 3.1764888763427734
epoch: 11, training loss: 0.263799, training acc: 0.914920, valid loss: 0.316681, valid acc: 0.890060



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.31it/s, accuracy=1, cost=0.276]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 227.50it/s, accuracy=0.778, cost=0.498]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 74.38it/s, accuracy=0.906, cost=0.197]

epoch: 12, pass acc: 0.890060, current acc: 0.891063
time taken: 3.2097699642181396
epoch: 12, training loss: 0.249953, training acc: 0.920693, valid loss: 0.309735, valid acc: 0.891063



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.97it/s, accuracy=1, cost=0.263]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 231.38it/s, accuracy=0.778, cost=0.498]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 78.19it/s, accuracy=0.938, cost=0.179]

epoch: 13, pass acc: 0.891063, current acc: 0.893572
time taken: 3.184178590774536
epoch: 13, training loss: 0.237585, training acc: 0.924457, valid loss: 0.304114, valid acc: 0.893572



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 86.80it/s, accuracy=1, cost=0.252]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 219.03it/s, accuracy=0.778, cost=0.499]
train minibatch loop:   5%|▍         | 12/250 [00:00<00:02, 110.12it/s, accuracy=0.938, cost=0.147]

epoch: 14, pass acc: 0.893572, current acc: 0.894575
time taken: 3.2424464225769043
epoch: 14, training loss: 0.226381, training acc: 0.928473, valid loss: 0.299542, valid acc: 0.894575



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 84.83it/s, accuracy=1, cost=0.243]    
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 213.38it/s, accuracy=0.778, cost=0.5]  
train minibatch loop:   5%|▍         | 12/250 [00:00<00:02, 110.51it/s, accuracy=0.75, cost=0.414] 

epoch: 15, pass acc: 0.894575, current acc: 0.895579
time taken: 3.2455670833587646
epoch: 15, training loss: 0.216118, training acc: 0.932739, valid loss: 0.295822, valid acc: 0.895579



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 86.58it/s, accuracy=1, cost=0.234]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 211.03it/s, accuracy=0.778, cost=0.502]
train minibatch loop:   4%|▎         | 9/250 [00:00<00:03, 78.15it/s, accuracy=0.75, cost=0.403] 

epoch: 16, pass acc: 0.895579, current acc: 0.897084
time taken: 3.1893484592437744
epoch: 16, training loss: 0.206631, training acc: 0.935626, valid loss: 0.292807, valid acc: 0.897084



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.59it/s, accuracy=1, cost=0.227]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 241.81it/s, accuracy=0.778, cost=0.504]
train minibatch loop:   3%|▎         | 7/250 [00:00<00:04, 59.32it/s, accuracy=0.969, cost=0.144]

epoch: 17, pass acc: 0.897084, current acc: 0.898088
time taken: 3.183919668197632
epoch: 17, training loss: 0.197800, training acc: 0.938888, valid loss: 0.290387, valid acc: 0.898088



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 84.57it/s, accuracy=1, cost=0.221]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 238.50it/s, accuracy=0.889, cost=0.506]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 72.00it/s, accuracy=0.969, cost=0.13] 

epoch: 18, pass acc: 0.898088, current acc: 0.899872
time taken: 3.223072052001953
epoch: 18, training loss: 0.189530, training acc: 0.941272, valid loss: 0.288476, valid acc: 0.899872



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.54it/s, accuracy=1, cost=0.215]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 212.98it/s, accuracy=0.889, cost=0.509]
train minibatch loop:   5%|▍         | 12/250 [00:00<00:02, 111.31it/s, accuracy=0.938, cost=0.202]

epoch: 19, pass acc: 0.899872, current acc: 0.901377
time taken: 3.220914602279663
epoch: 19, training loss: 0.181752, training acc: 0.943657, valid loss: 0.287009, valid acc: 0.901377



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.78it/s, accuracy=1, cost=0.21]      
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 235.75it/s, accuracy=0.889, cost=0.511]
train minibatch loop:   3%|▎         | 8/250 [00:00<00:03, 77.09it/s, accuracy=0.938, cost=0.197]

epoch: 20, pass acc: 0.901377, current acc: 0.901879
time taken: 3.183746099472046
epoch: 20, training loss: 0.174408, training acc: 0.947296, valid loss: 0.285935, valid acc: 0.901879



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 80.64it/s, accuracy=1, cost=0.205]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 247.81it/s, accuracy=0.889, cost=0.514]
train minibatch loop:   3%|▎         | 7/250 [00:00<00:03, 68.21it/s, accuracy=0.938, cost=0.176]

time taken: 3.181997060775757
epoch: 21, training loss: 0.167454, training acc: 0.950182, valid loss: 0.285210, valid acc: 0.899370



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 87.21it/s, accuracy=1, cost=0.2]       
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 215.59it/s, accuracy=0.889, cost=0.517]
train minibatch loop:   4%|▍         | 11/250 [00:00<00:02, 106.46it/s, accuracy=0.938, cost=0.118]

time taken: 3.1607871055603027
epoch: 22, training loss: 0.160853, training acc: 0.952441, valid loss: 0.284802, valid acc: 0.899872



train minibatch loop: 100%|██████████| 250/250 [00:02<00:00, 85.02it/s, accuracy=1, cost=0.196]     
test minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 242.72it/s, accuracy=0.889, cost=0.52] 


time taken: 3.201809883117676
epoch: 23, training loss: 0.154575, training acc: 0.955076, valid loss: 0.284680, valid acc: 0.901879

break epoch:24



'fast-text/model.ckpt'

In [17]:
real_Y, predict_Y = [], []

pbar = tqdm(
    range(0, len(test_X), batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    batch_x = test_X[i : min(i + batch_size, test_X.shape[0])]
    batch_y = test_Y[i : min(i + batch_size, test_X.shape[0])]
    predict_Y += np.argmax(
        sess.run(
            model.logits, feed_dict = {model.X: batch_x, model.Y: batch_y}
        ),
        1,
    ).tolist()
    real_Y += batch_y

validation minibatch loop: 100%|██████████| 63/63 [00:00<00:00, 718.86it/s]


In [18]:
from sklearn import metrics
print(metrics.classification_report(real_Y, predict_Y, target_names = ['-','+']))

             precision    recall  f1-score   support

          -       0.88      0.89      0.89       953
          +       0.90      0.89      0.90      1040

avg / total       0.89      0.89      0.89      1993



In [19]:
text = 'kerajaan sebenarnya sangat sayangkan rakyatnya, tetapi sebenarnya benci'
new_vector = str_idx([classification_textcleaning(text)],dictionary, len(text.split()))
sess.run(tf.nn.softmax(model.logits), feed_dict={model.X:new_vector})

array([[2.0217092e-05, 9.9997973e-01]], dtype=float32)

In [20]:
import json
with open('fast-text-subjective.json','w') as fopen:
    fopen.write(json.dumps({'dictionary':dictionary,'reverse_dictionary':rev_dictionary}))

In [21]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [22]:
freeze_graph('fast-text', strings)

INFO:tensorflow:Restoring parameters from fast-text/model.ckpt
INFO:tensorflow:Froze 3 variables.
INFO:tensorflow:Converted 3 variables to const ops.
16 ops in the final graph.


In [23]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [24]:
g = load_graph('fast-text/frozen_model.pb')
x = g.get_tensor_by_name('import/Placeholder:0')
logits = g.get_tensor_by_name('import/logits:0')
test_sess = tf.InteractiveSession(graph = g)
test_sess.run(tf.nn.softmax(logits), feed_dict = {x: new_vector})



array([[2.0217092e-05, 9.9997973e-01]], dtype=float32)