In [1]:
import tensorflow as tf
import numpy as np

In [2]:
class Attention:
    def __init__(self,hidden_size):
        self.hidden_size = hidden_size
        self.dense_layer = tf.layers.Dense(hidden_size)
        self.v = tf.random_normal([hidden_size],mean=0,stddev=1/np.sqrt(hidden_size))
        
    def score(self, hidden_tensor, encoder_outputs):
        energy = tf.nn.tanh(self.dense_layer(tf.concat([hidden_tensor,encoder_outputs],2)))
        energy = tf.transpose(energy,[0,2,1])
        batch_size = tf.shape(encoder_outputs)[0]
        v = tf.expand_dims(tf.tile(tf.expand_dims(self.v,0),[batch_size,1]),1)
        energy = tf.matmul(v,energy)
        return tf.squeeze(energy,1)
    
    def __call__(self, hidden, encoder_outputs):
        seq_len = tf.shape(encoder_outputs)[1]
        batch_size = tf.shape(encoder_outputs)[0]
        H = tf.tile(tf.expand_dims(hidden, 1),[1,seq_len,1])
        attn_energies = self.score(H,encoder_outputs)
        return tf.expand_dims(tf.nn.softmax(attn_energies),1)

class Model:
    def __init__(
        self,
        dict_size,
        size_layers,
        learning_rate,
        maxlen,
        num_blocks = 3,
    ):
        block_size = size_layers
        self.BEFORE = tf.placeholder(tf.int32,[None,maxlen])
        self.INPUT = tf.placeholder(tf.int32,[None,maxlen])
        self.AFTER = tf.placeholder(tf.int32,[None,maxlen])
        self.batch_size = tf.shape(self.INPUT)[0]
        self.output_layer = tf.layers.Dense(dict_size, name="output_layer")
        self.output_layer.build(size_layers)
        self.embeddings = tf.Variable(tf.random_uniform([dict_size, size_layers], -1, 1))
        embedded = tf.nn.embedding_lookup(self.embeddings, self.INPUT)
        self.attention = Attention(size_layers)

        def residual_block(x, size, rate, block, reuse = False):
            with tf.variable_scope(
                'block_%d_%d' % (block, rate), reuse = reuse
            ):
                attn_weights = self.attention(tf.reduce_sum(x,axis=1), x)
                conv_filter = tf.layers.conv1d(
                    attn_weights,
                    x.shape[2] // 4,
                    kernel_size = size,
                    strides = 1,
                    padding = 'same',
                    dilation_rate = rate,
                    activation = tf.nn.tanh,
                )
                conv_gate = tf.layers.conv1d(
                    x,
                    x.shape[2] // 4,
                    kernel_size = size,
                    strides = 1,
                    padding = 'same',
                    dilation_rate = rate,
                    activation = tf.nn.sigmoid,
                )
                out = tf.multiply(conv_filter, conv_gate)
                out = tf.layers.conv1d(
                    out,
                    block_size,
                    kernel_size = 1,
                    strides = 1,
                    padding = 'same',
                    activation = tf.nn.tanh,
                )
                return tf.add(x, out), out

        forward = tf.layers.conv1d(
            embedded, block_size, kernel_size = 1, strides = 1, padding = 'SAME'
        )
        zeros = tf.zeros_like(forward)
        for i in range(num_blocks):
            for r in [1, 2, 4, 8, 16]:
                forward, s = residual_block(
                    forward, size = 7, rate = r, block = i
                )
                zeros = tf.add(zeros, s)
        forward = tf.layers.conv1d(
            zeros,
            block_size,
            kernel_size = 1,
            strides = 1,
            padding = 'SAME',
            activation = tf.nn.tanh,
        )
        self.get_thought = tf.reduce_sum(forward,axis=1, name = 'logits')
        
        def decoder(labels, reuse):
            decoder_in = tf.nn.embedding_lookup(self.embeddings, labels)
            forward = tf.layers.conv1d(
                decoder_in, block_size, kernel_size = 1, strides = 1, padding = 'SAME'
            )
            zeros = tf.zeros_like(forward)
            for r in [8, 16, 24]:
                forward, s = residual_block(forward, size = 7, rate = r, block = 10, reuse = reuse)
                zeros = tf.add(zeros, s)
            return tf.layers.conv1d(
                zeros,
                block_size,
                kernel_size = 1,
                strides = 1,
                padding = 'SAME',
                activation = tf.nn.tanh,
            )
        
        fw_logits = decoder(self.AFTER, False)
        bw_logits = decoder(self.BEFORE, True)
        self.attention = tf.matmul(
            self.get_thought, tf.transpose(self.embeddings), name = 'attention'
        )
        self.loss = self.calculate_loss(fw_logits, self.AFTER) + self.calculate_loss(bw_logits, self.BEFORE)
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
    
    def calculate_loss(self, outputs, labels):
        mask = tf.cast(tf.sign(labels), tf.float32)
        logits = self.output_layer(outputs)
        return tf.contrib.seq2seq.sequence_loss(logits, labels, mask)

In [3]:
import json
with open('skip-wiki-dict.json') as fopen:
    dictionary = json.load(fopen)
len(dictionary)

200004

In [4]:
def rename(checkpoint_dir, replace_from, replace_to, add_prefix, dry_run=False):
    checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)
    with tf.Session() as sess:
        for var_name, _ in tf.contrib.framework.list_variables(checkpoint_dir):
            var = tf.contrib.framework.load_variable(checkpoint_dir, var_name)
            new_name = var_name
            if None not in [replace_from, replace_to]:
                new_name = new_name.replace(replace_from, replace_to)
            if add_prefix:
                new_name = add_prefix + new_name

            if dry_run:
                print('%s would be renamed to %s.' % (var_name, new_name))
            else:
                print('Renaming %s to %s.' % (var_name, new_name))
                # Rename the variable
                var = tf.Variable(var, name=new_name)

        if not dry_run:
            # Save the variables
            saver = tf.train.Saver()
            sess.run(tf.global_variables_initializer())
            saver.save(sess, 'skip-rename/model.ckpt')

In [5]:
# rename('skip/model.ckpt','thought_scope_e1d42da4-5ae4-4898-b0f1-f52f687a4e28',
#       'thought_scope',None)

In [8]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(len(dictionary), 64, 1e-3, 50)
sess.run(tf.global_variables_initializer())



In [9]:
saver=tf.train.Saver(tf.global_variables())
saver.restore(sess, 'skip-wiki/model.ckpt')

INFO:tensorflow:Restoring parameters from skip-wiki/model.ckpt


In [10]:
import random

def sequence(s, w2v_model, maxlen, vocabulary_size):
    words = s.split()
    np_array = np.zeros((maxlen),dtype=np.int32)
    current_no = 0
    for no, word in enumerate(words[:maxlen - 2]):
        id_to_append = 1
        if word in w2v_model:
            word_id = w2v_model[word]
            if word_id < vocabulary_size:
                id_to_append = word_id
        np_array[no] = id_to_append
        current_no = no
    np_array[current_no + 1] = 3
    return np_array

def generate_batch(sentences,batch_size,w2v_model,maxlen,vocabulary_size):
    window_size = batch_size + 2
    first_index = 1000
    batch_sentences = sentences[first_index:first_index+window_size]
    print(batch_sentences)
    batch_sequences = np.array([sequence(sentence,w2v_model,maxlen,vocabulary_size) for sentence in batch_sentences])
    window_shape = []
    for i in range(batch_size):
        window_shape.append(batch_sequences[i:i+3])
    window_shape = np.array(window_shape)
    return window_shape[:,0], window_shape[:,1], window_shape[:,2]

In [11]:
import json
with open('news-bm.json','r') as fopen:
    sentences = json.loads(fopen.read())

In [14]:
bw_input, current_input, fw_input = generate_batch(sentences,1,dictionary,50,len(dictionary))

['pahang diwakili pemangku raja pahang tengku abdullah sultan ahmad shah manakala kelantan diwakili pemangku raja kelantan dr', 'tengku muhammad faiz petra', 'pada hari kedua mesyuarat yang bermula kira pukul pagi itu raja-raja melayu diiringi menteri besar masing-masing manakala yang dipertua negeri pulau pinang sabah dan melaka diiringi ketua menteri masing-masing']


In [15]:
encoded = sess.run(model.get_thought,feed_dict={model.INPUT:fw_input})

In [16]:
encoded

array([[ 0.07066324,  0.13310698, -0.62426007, -0.4613824 , -0.17707539,
        -0.3925364 ,  1.1155262 ,  1.1873002 ,  0.48969495,  0.81452906,
        -0.1577659 , -0.17734857, -0.37914753, -0.7942437 ,  0.56107384,
         0.29675886, -0.7340232 , -0.07755096,  0.29897642, -0.0737358 ,
         0.6024291 ,  0.95485014, -0.95064414, -0.63884234,  0.03552189,
        -0.40762448, -0.25227717, -0.24423571,  0.37850273, -0.11428429,
        -0.8386208 , -0.2072649 , -0.9640392 , -0.63121736, -0.5339436 ,
         0.96501446, -0.12163527,  0.31738836,  0.9421329 , -0.51436657,
         0.6444553 , -0.2436821 , -0.4731561 , -0.00128211, -0.05046922,
         0.5482205 ,  0.85903156,  0.681826  ,  0.02734087,  0.5048841 ,
         0.08036114,  0.00166782,  0.5863657 ,  0.37902188, -0.14853519,
         0.11486635,  0.03344561,  1.1854374 , -0.07733421, -0.8486209 ,
         0.9942196 ,  0.9136265 , -0.10116772, -0.21602613]],
      dtype=float32)

In [17]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if (
            'Variable' in n.op
            or n.name.find('Placeholder') >= 0
            or 'add_1' in n.name
            or 'attention' in n.name
            or 'logits' in n.name
        )
        and 'Adam' not in n.name
    ]
)

In [18]:
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'output_layer/kernel',
 'output_layer/bias',
 'Variable',
 'conv1d/kernel',
 'conv1d/bias',
 'block_0_1/dense/kernel',
 'block_0_1/dense/bias',
 'block_0_1/dense/Tensordot/add_1',
 'block_0_1/conv1d/kernel',
 'block_0_1/conv1d/bias',
 'block_0_1/conv1d_1/kernel',
 'block_0_1/conv1d_1/bias',
 'block_0_1/conv1d_2/kernel',
 'block_0_1/conv1d_2/bias',
 'block_0_1/dense/Tensordot_1/add_1',
 'block_0_2/conv1d/kernel',
 'block_0_2/conv1d/bias',
 'block_0_2/conv1d_1/kernel',
 'block_0_2/conv1d_1/bias',
 'block_0_2/conv1d_2/kernel',
 'block_0_2/conv1d_2/bias',
 'block_0_1/dense/Tensordot_2/add_1',
 'block_0_4/conv1d/kernel',
 'block_0_4/conv1d/bias',
 'block_0_4/conv1d_1/kernel',
 'block_0_4/conv1d_1/bias',
 'block_0_4/conv1d_2/kernel',
 'block_0_4/conv1d_2/bias',
 'block_0_1/dense/Tensordot_3/add_1',
 'block_0_8/conv1d/kernel',
 'block_0_8/conv1d/bias',
 'block_0_8/conv1d_1/kernel',
 'block_0_8/conv1d_1/bias',
 'block_0_8/conv1d_2/kernel',
 

In [19]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            "directory: %s" % model_dir)

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + "/frozen_model.pb"
    clear_devices = True
    with tf.Session(graph=tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(",")
        ) 
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))

In [20]:
freeze_graph('skip-wiki', strings)

INFO:tensorflow:Restoring parameters from skip-wiki/model.ckpt
INFO:tensorflow:Froze 127 variables.
Converted 127 variables to const ops.
2031 ops in the final graph.


In [21]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [22]:
g=load_graph('skip-wiki/frozen_model.pb')

In [23]:
x = g.get_tensor_by_name('import/Placeholder_1:0')
logits = g.get_tensor_by_name('import/logits:0')
attention = g.get_tensor_by_name('import/attention:0')
test_sess = tf.InteractiveSession(graph=g)
out, att = test_sess.run([logits,attention], feed_dict={x:fw_input})



In [24]:
att.shape

(1, 200004)

In [25]:
rev_dict = {v: k for k, v in dictionary.items()}

In [26]:
for i in att[0].argsort()[-10:][::-1]:
    print(i)
    print(rev_dict[i])

38799
jagaannya
4035
zulkifli
101993
ferdy
11445
hoe
165827
sharidake
325
televisyen
1681
kawan
124186
diimbau
34683
luteum
636
brunei
