In [1]:
import tensorflow as tf
import numpy as np
import pickle

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
with open('dataset.pkl', 'rb') as fopen:
    dataset = pickle.load(fopen)
dataset.keys()

dict_keys(['train_texts', 'test_texts', 'train_clss', 'test_clss', 'train_labels', 'test_labels'])

In [3]:
len(dataset['train_texts'])

73967

In [4]:
with open('dictionary.pkl', 'rb') as fopen:
    dictionary = pickle.load(fopen)

In [5]:
rev_dictionary = dictionary['rev_dictionary']
dictionary = dictionary['dictionary']

In [6]:
def position_encoding(inputs):
    T = tf.shape(inputs)[1]
    repr_dim = inputs.get_shape()[-1].value
    pos = tf.reshape(tf.range(0.0, tf.to_float(T), dtype=tf.float32), [-1, 1])
    i = np.arange(0, repr_dim, 2, np.float32)
    denom = np.reshape(np.power(10000.0, i / repr_dim), [1, -1])
    enc = tf.expand_dims(tf.concat([tf.sin(pos / denom), tf.cos(pos / denom)], 1), 0)
    return tf.tile(enc, [tf.shape(inputs)[0], 1, 1])

def layer_norm(inputs, epsilon=1e-8):
    mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
    normalized = (inputs - mean) / (tf.sqrt(variance + epsilon))
    params_shape = inputs.get_shape()[-1:]
    gamma = tf.get_variable('gamma', params_shape, tf.float32, tf.ones_initializer())
    beta = tf.get_variable('beta', params_shape, tf.float32, tf.zeros_initializer())
    return gamma * normalized + beta


def cnn_block(x, dilation_rate, pad_sz, hidden_dim, kernel_size):
    x = layer_norm(x)
    pad = tf.zeros([tf.shape(x)[0], pad_sz, hidden_dim])
    x =  tf.layers.conv1d(inputs = tf.concat([pad, x, pad], 1),
                          filters = hidden_dim,
                          kernel_size = kernel_size,
                          dilation_rate = dilation_rate)
    x = x[:, :-pad_sz, :]
    x = tf.nn.relu(x)
    return x

class Model:
    def __init__(self, size_layer, num_layers, embedded_size,
                 dict_size, learning_rate, kernel_size = 3):
        
        def cells(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size_layer,initializer=tf.orthogonal_initializer(),reuse=reuse)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.float32, [None, None])
        self.mask = tf.placeholder(tf.int32, [None, None])
        self.clss = tf.placeholder(tf.int32, [None, None])
        mask = tf.cast(self.mask, tf.float32)
        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        encoder_embedded += position_encoding(encoder_embedded)
        
        for i in range(num_layers): 
            dilation_rate = 2 ** i
            pad_sz = (kernel_size - 1) * dilation_rate 
            with tf.variable_scope('block_%d'%i,reuse=False):
                encoder_embedded += cnn_block(encoder_embedded, dilation_rate, 
                                              pad_sz, size_layer, kernel_size)
                        
        outputs = tf.gather(encoder_embedded, self.clss, axis = 1, batch_dims = 1)
        self.logits = tf.layers.dense(outputs, 1)
        self.logits = tf.squeeze(self.logits, axis=-1)
        self.logits = self.logits * mask
        crossent = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.Y)
        crossent = crossent * mask
        crossent = tf.reduce_sum(crossent)
        total_size = tf.reduce_sum(mask)
        self.cost = tf.div_no_nan(crossent, total_size)
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        
        l = tf.round(tf.sigmoid(self.logits))
        self.accuracy = tf.reduce_mean(tf.cast(tf.boolean_mask(l, tf.equal(self.Y, 1)), tf.float32))

In [7]:
size_layer = 256
num_layers = 4
embedded_size = 256
learning_rate = 1e-3
batch_size = 128
epoch = 20

In [8]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(size_layer,num_layers,embedded_size,len(dictionary),learning_rate)
sess.run(tf.global_variables_initializer())

Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use `tf.keras.layers.Conv1D` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [9]:
UNK = 3

def str_idx(corpus, dic):
    X = []
    for i in corpus:
        ints = []
        for k in i.split():
            ints.append(dic.get(k,UNK))
        X.append(ints)
    return X

def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [10]:
train_X = str_idx(dataset['train_texts'], dictionary)
test_X = str_idx(dataset['test_texts'], dictionary)

In [11]:
train_clss = dataset['train_clss']
test_clss = dataset['test_clss']
train_Y = dataset['train_labels']
test_Y = dataset['test_labels']

In [12]:
batch_x, _ = pad_sentence_batch(train_X[:5], 0)
batch_y, _ = pad_sentence_batch(train_Y[:5], 0)
batch_clss, _ = pad_sentence_batch(train_clss[:5], -1)
batch_clss = np.array(batch_clss)
batch_mask = 1 - (batch_clss == -1)
batch_clss[batch_clss == -1] = 0

feed = {model.X: batch_x,
        model.Y: batch_y,
        model.mask: batch_mask,
        model.clss: batch_clss}
acc, loss, _ = sess.run([model.accuracy, model.cost,model.optimizer], feed_dict = feed)
acc, loss

(0.36363637, 0.80718136)

In [13]:
import tqdm

for e in range(epoch):
    pbar = tqdm.tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x, _ = pad_sentence_batch(train_X[i : index], 0)
        batch_y, _ = pad_sentence_batch(train_Y[i : index], 0)
        batch_clss, _ = pad_sentence_batch(train_clss[i : index], -1)
        batch_clss = np.array(batch_clss)
        batch_mask = 1 - (batch_clss == -1)
        batch_clss[batch_clss == -1] = 0
        feed = {model.X: batch_x,
                model.Y: batch_y,
                model.mask: batch_mask,
                model.clss: batch_clss}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    pbar = tqdm.tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x, _ = pad_sentence_batch(test_X[i : index], 0)
        batch_y, _ = pad_sentence_batch(test_Y[i : index], 0)
        batch_clss, _ = pad_sentence_batch(test_clss[i : index], -1)
        batch_clss = np.array(batch_clss)
        batch_mask = 1 - (batch_clss == -1)
        batch_clss[batch_clss == -1] = 0
        feed = {model.X: batch_x,
                model.Y: batch_y,
                model.mask: batch_mask,
                model.clss: batch_clss}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|██████████| 578/578 [05:11<00:00,  1.86it/s, accuracy=0, cost=0.268]      
minibatch loop: 100%|██████████| 145/145 [00:21<00:00,  6.74it/s, accuracy=0, cost=0.221]     
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 1, training avg loss 0.267856, training avg acc 0.003014
epoch 1, testing avg loss 0.253723, testing avg acc 0.000193


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.42it/s, accuracy=0.0106, cost=0.265] 
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.66it/s, accuracy=0, cost=0.222]      
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 2, training avg loss 0.252464, training avg acc 0.001037
epoch 2, testing avg loss 0.253672, testing avg acc 0.001260


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.0142, cost=0.261] 
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.67it/s, accuracy=0, cost=0.224]      
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 3, training avg loss 0.248327, training avg acc 0.005151
epoch 3, testing avg loss 0.255450, testing avg acc 0.003196


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.44it/s, accuracy=0.0319, cost=0.253] 
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.65it/s, accuracy=0.0129, cost=0.232] 
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 4, training avg loss 0.240196, training avg acc 0.020250
epoch 4, testing avg loss 0.260763, testing avg acc 0.007725


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.44it/s, accuracy=0.078, cost=0.238] 
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.61it/s, accuracy=0.0129, cost=0.24]  
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 5, training avg loss 0.225537, training avg acc 0.067153
epoch 5, testing avg loss 0.272367, testing avg acc 0.018334


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.195, cost=0.211] 
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.61it/s, accuracy=0.0452, cost=0.258]
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 6, training avg loss 0.203671, training avg acc 0.162199
epoch 6, testing avg loss 0.290515, testing avg acc 0.036583


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.365, cost=0.182]
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.66it/s, accuracy=0.0516, cost=0.282]
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 7, training avg loss 0.177396, training avg acc 0.287097
epoch 7, testing avg loss 0.317477, testing avg acc 0.072228


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.443, cost=0.148]
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.65it/s, accuracy=0.103, cost=0.321] 
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 8, training avg loss 0.151854, training avg acc 0.402537
epoch 8, testing avg loss 0.356769, testing avg acc 0.102667


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.42it/s, accuracy=0.571, cost=0.122]
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.58it/s, accuracy=0.11, cost=0.361]  
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 9, training avg loss 0.128955, training avg acc 0.500623
epoch 9, testing avg loss 0.398493, testing avg acc 0.111438


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.42it/s, accuracy=0.582, cost=0.102] 
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.65it/s, accuracy=0.135, cost=0.421] 
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 10, training avg loss 0.110343, training avg acc 0.574596
epoch 10, testing avg loss 0.457165, testing avg acc 0.138099


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.532, cost=0.101] 
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.65it/s, accuracy=0.0839, cost=0.452]
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 11, training avg loss 0.094866, training avg acc 0.633989
epoch 11, testing avg loss 0.505708, testing avg acc 0.099099


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.645, cost=0.0773]
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.64it/s, accuracy=0.0839, cost=0.495]
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 12, training avg loss 0.083911, training avg acc 0.674366
epoch 12, testing avg loss 0.558003, testing avg acc 0.077336


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.798, cost=0.06]  
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.66it/s, accuracy=0.116, cost=0.541] 
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 13, training avg loss 0.072917, training avg acc 0.712523
epoch 13, testing avg loss 0.596416, testing avg acc 0.117849


minibatch loop: 100%|██████████| 578/578 [02:49<00:00,  3.42it/s, accuracy=0.794, cost=0.0595]
minibatch loop: 100%|██████████| 145/145 [00:17<00:00,  8.49it/s, accuracy=0.161, cost=0.595]
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 14, training avg loss 0.065610, training avg acc 0.737632
epoch 14, testing avg loss 0.634937, testing avg acc 0.159761


minibatch loop: 100%|██████████| 578/578 [02:56<00:00,  3.27it/s, accuracy=0.599, cost=0.0775]
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.60it/s, accuracy=0.11, cost=0.584]  
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 15, training avg loss 0.060682, training avg acc 0.755263
epoch 15, testing avg loss 0.646741, testing avg acc 0.103789


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.43it/s, accuracy=0.741, cost=0.0466]
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.59it/s, accuracy=0.071, cost=0.63]  
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 16, training avg loss 0.054130, training avg acc 0.777972
epoch 16, testing avg loss 0.707160, testing avg acc 0.068564


minibatch loop: 100%|██████████| 578/578 [02:49<00:00,  3.42it/s, accuracy=0.858, cost=0.0818]
minibatch loop: 100%|██████████| 145/145 [00:17<00:00,  8.35it/s, accuracy=0.161, cost=0.701]
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 17, training avg loss 0.048324, training avg acc 0.796035
epoch 17, testing avg loss 0.731096, testing avg acc 0.179190


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.42it/s, accuracy=0.833, cost=0.0343]
minibatch loop: 100%|██████████| 145/145 [00:19<00:00,  7.42it/s, accuracy=0.181, cost=0.732]
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 18, training avg loss 0.046448, training avg acc 0.802126
epoch 18, testing avg loss 0.783001, testing avg acc 0.167646


minibatch loop: 100%|██████████| 578/578 [02:55<00:00,  3.28it/s, accuracy=0.791, cost=0.0392]
minibatch loop: 100%|██████████| 145/145 [00:17<00:00,  8.52it/s, accuracy=0.135, cost=0.772] 
minibatch loop:   0%|          | 0/578 [00:00<?, ?it/s]

epoch 19, training avg loss 0.043719, training avg acc 0.812243
epoch 19, testing avg loss 0.852649, testing avg acc 0.110365


minibatch loop: 100%|██████████| 578/578 [02:48<00:00,  3.42it/s, accuracy=0.837, cost=0.0352]
minibatch loop: 100%|██████████| 145/145 [00:16<00:00,  8.68it/s, accuracy=0.135, cost=0.8]   

epoch 20, training avg loss 0.042226, training avg acc 0.816797
epoch 20, testing avg loss 0.871126, testing avg acc 0.131413





In [14]:
from tensor2tensor.utils import rouge
from tensorflow.keras.preprocessing import sequence

def calculate_rouges(predicted, batch_y):
    non = np.count_nonzero(batch_y, axis = 1)
    o = []
    for n in non:
        o.append([True for _ in range(n)])
    b = sequence.pad_sequences(o, dtype = np.bool, padding = 'post', value = False)
    batch_y = np.array(batch_y)
    rouges = []
    for i in range(predicted.shape[0]):
        a = batch_y[i][b[i]]
        p = predicted[i][b[i]]
        rouges.append(rouge.rouge_n([p], [a]))
    return np.mean(rouges)

In [15]:
batch_x, _ = pad_sentence_batch(test_X[: 5], 0)
batch_y, _ = pad_sentence_batch(test_Y[: 5], 0)
batch_clss, _ = pad_sentence_batch(test_clss[: 5], -1)
batch_clss = np.array(batch_clss)
batch_y = np.array(batch_y)
batch_x = np.array(batch_x)
cp_batch_clss = batch_clss.copy()
batch_mask = 1 - (batch_clss == -1)
batch_clss[batch_clss == -1] = 0

feed = {model.X: batch_x,
        model.mask: batch_mask,
        model.clss: batch_clss}
predicted = sess.run(tf.round(tf.sigmoid(model.logits)), feed_dict = feed)

In [16]:
from tensor2tensor.utils import rouge

def calculate_rouge(predicted, batch_y, cp_batch_clss, batch_x):
    f, y_, predicted_ = [], [], []
    for i in range(len(cp_batch_clss)):
        f.append(cp_batch_clss[i][cp_batch_clss[i] != -1])
        y_.append(batch_y[i][cp_batch_clss[i] != -1])
        predicted_.append(predicted[i][cp_batch_clss[i] != -1])
    
    actual, predict = [], []
    for i in range(len(f)):
        actual_, predict_ = [], []
        for k in range(len(f[i])):
            if k == (len(f[i]) - 1):
                s = batch_x[i][f[i][k]:]
                s = s[s != 0]
            else:
                s = batch_x[i][f[i][k]: f[i][k + 1]]
            s = [w for w in s if w not in [0, 1, 2, 3, 5, 6, 7, 8]]
            if y_[i][k]:
                actual_.extend(s)
            if predicted_[i][k]:
                predict_.extend(s)
        actual.append(actual_)
        predict.append(predict_)
    return rouge.rouge_n(predict, actual)

calculate_rouge(predicted, batch_y, cp_batch_clss, batch_x)

0.02314938

In [17]:
from tqdm import tqdm as tqdm_base
def tqdm(*args, **kwargs):
    if hasattr(tqdm_base, '_instances'):
        for instance in list(tqdm_base._instances):
            tqdm_base._decr_instances(instance)
    return tqdm_base(*args, **kwargs)

In [18]:
rouges = []

pbar = tqdm(
    range(0, len(test_X), 32), desc = 'minibatch loop')
for i in pbar:
    index = min(i + batch_size, len(test_X))
    batch_x, _ = pad_sentence_batch(test_X[i: index], 0)
    batch_y, _ = pad_sentence_batch(test_Y[i: index], 0)
    batch_clss, _ = pad_sentence_batch(test_clss[i: index], -1)
    batch_clss = np.array(batch_clss)
    batch_y = np.array(batch_y)
    batch_x = np.array(batch_x)
    cp_batch_clss = batch_clss.copy()
    batch_mask = 1 - (batch_clss == -1)
    batch_clss[batch_clss == -1] = 0

    feed = {model.X: batch_x,
            model.mask: batch_mask,
            model.clss: batch_clss}
    predicted = sess.run(tf.round(tf.sigmoid(model.logits)), feed_dict = feed)
    rouge_ = calculate_rouge(predicted, batch_y, cp_batch_clss, batch_x)
    rouges.append(rouge_)
    pbar.set_postfix(rouge = rouge_)

minibatch loop: 100%|██████████| 578/578 [03:33<00:00,  2.71it/s, rouge=0.186]


In [19]:
np.mean(rouges)

0.1554709