In [1]:
import tensorflow as tf
import numpy as np
import pickle

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
with open('dataset.pkl', 'rb') as fopen:
    dataset = pickle.load(fopen)
dataset.keys()

dict_keys(['train_texts', 'test_texts', 'train_clss', 'test_clss', 'train_labels', 'test_labels'])

In [3]:
len(dataset['train_texts'])

73967

In [4]:
with open('dictionary.pkl', 'rb') as fopen:
    dictionary = pickle.load(fopen)

In [5]:
rev_dictionary = dictionary['rev_dictionary']
dictionary = dictionary['dictionary']

In [6]:
def ln(inputs, epsilon = 1e-8, scope="ln"):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        inputs_shape = inputs.get_shape()
        params_shape = inputs_shape[-1:]
    
        mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
        beta= tf.get_variable("beta", params_shape, initializer=tf.zeros_initializer())
        gamma = tf.get_variable("gamma", params_shape, initializer=tf.ones_initializer())
        normalized = (inputs - mean) / ( (variance + epsilon) ** (.5) )
        outputs = gamma * normalized + beta
        
    return outputs

def scaled_dot_product_attention(Q, K, V,
                                 causality=False, dropout_rate=0.,
                                 training=True,
                                 scope="scaled_dot_product_attention"):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        d_k = Q.get_shape().as_list()[-1]

        outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1]))  # (N, T_q, T_k)
        outputs /= d_k ** 0.5
        outputs = mask(outputs, Q, K, type="key")
        if causality:
            outputs = mask(outputs, type="future")
        outputs = tf.nn.softmax(outputs)
        attention = tf.transpose(outputs, [0, 2, 1])
        #tf.summary.image("attention", tf.expand_dims(attention[:1], -1))
        outputs = mask(outputs, Q, K, type="query")
        outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training)
        outputs = tf.matmul(outputs, V)
    return outputs

def mask(inputs, queries=None, keys=None, type=None):
    padding_num = -2 ** 32 + 1
    if type in ("k", "key", "keys"):
        masks = tf.sign(tf.reduce_sum(tf.abs(keys), axis=-1))  # (N, T_k)
        masks = tf.expand_dims(masks, 1) # (N, 1, T_k)
        masks = tf.tile(masks, [1, tf.shape(queries)[1], 1])  # (N, T_q, T_k)
        paddings = tf.ones_like(inputs) * padding_num
        outputs = tf.where(tf.equal(masks, 0), paddings, inputs)  # (N, T_q, T_k)
    elif type in ("q", "query", "queries"):
        masks = tf.sign(tf.reduce_sum(tf.abs(queries), axis=-1))  # (N, T_q)
        masks = tf.expand_dims(masks, -1)  # (N, T_q, 1)
        masks = tf.tile(masks, [1, 1, tf.shape(keys)[1]])  # (N, T_q, T_k)
        outputs = inputs*masks
    elif type in ("f", "future", "right"):
        diag_vals = tf.ones_like(inputs[0, :, :])  # (T_q, T_k)
        tril = tf.linalg.LinearOperatorLowerTriangular(diag_vals).to_dense()  # (T_q, T_k)
        masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(inputs)[0], 1, 1])  # (N, T_q, T_k)
        paddings = tf.ones_like(masks) * padding_num
        outputs = tf.where(tf.equal(masks, 0), paddings, inputs)
    else:
        print("Check if you entered type correctly!")


    return outputs

def multihead_attention(queries, keys, values,
                        num_heads=8, 
                        dropout_rate=0,
                        training=True,
                        causality=False,
                        scope="multihead_attention"):
    d_model = queries.get_shape().as_list()[-1]
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        # Linear projections
        Q = tf.layers.dense(queries, d_model, use_bias=False) # (N, T_q, d_model)
        K = tf.layers.dense(keys, d_model, use_bias=False) # (N, T_k, d_model)
        V = tf.layers.dense(values, d_model, use_bias=False) # (N, T_k, d_model)
        
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, d_model/h)
        K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h)
        V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, d_model/h)

        outputs = scaled_dot_product_attention(Q_, K_, V_, causality, dropout_rate, training)
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2 ) # (N, T_q, d_model)
        outputs += queries
        outputs = ln(outputs)
 
    return outputs

def ff(inputs, num_units, scope="positionwise_feedforward"):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        outputs = tf.layers.dense(inputs, num_units[0], activation=tf.nn.relu)
        outputs = tf.layers.dense(outputs, num_units[1])
        outputs += inputs
        outputs = ln(outputs)
    
    return outputs

def label_smoothing(inputs, epsilon=0.1):
    V = inputs.get_shape().as_list()[-1] # number of channels
    return ((1-epsilon) * inputs) + (epsilon / V)

def sinusoidal_position_encoding(inputs, mask, repr_dim):
    T = tf.shape(inputs)[1]
    pos = tf.reshape(tf.range(0.0, tf.to_float(T), dtype=tf.float32), [-1, 1])
    i = np.arange(0, repr_dim, 2, np.float32)
    denom = np.reshape(np.power(10000.0, i / repr_dim), [1, -1])
    enc = tf.expand_dims(tf.concat([tf.sin(pos / denom), tf.cos(pos / denom)], 1), 0)
    return tf.tile(enc, [tf.shape(inputs)[0], 1, 1]) * tf.expand_dims(tf.to_float(mask), -1)

class Model:
    def __init__(self, size_layer, embedded_size,
                 dict_size, learning_rate,
                 num_blocks = 4, num_heads = 8, ratio_hidden = 2):
        
        def cells(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size_layer,initializer=tf.orthogonal_initializer(),reuse=reuse)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.float32, [None, None])
        self.mask = tf.placeholder(tf.int32, [None, None])
        self.clss = tf.placeholder(tf.int32, [None, None])
        mask = tf.cast(self.mask, tf.float32)
        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        en_masks = tf.sign(self.X)
        encoder_embedded += sinusoidal_position_encoding(self.X, en_masks, size_layer)
        enc = encoder_embedded
        
        for i in range(num_blocks):
            with tf.variable_scope('encoder_self_attn_%d'%i,reuse=False):
                enc = multihead_attention(queries=enc,
                                          keys=enc,
                                          values=enc,
                                          num_heads=num_heads,
                                          causality=False)
                enc = ff(enc, num_units=[size_layer * ratio_hidden, size_layer])
                        
        outputs = tf.gather(enc, self.clss, axis = 1, batch_dims = 1)
        self.logits = tf.layers.dense(outputs, 1)
        self.logits = tf.squeeze(self.logits, axis=-1)
        self.logits = self.logits * mask
        crossent = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.Y)
        crossent = crossent * mask
        crossent = tf.reduce_sum(crossent)
        total_size = tf.reduce_sum(mask)
        self.cost = tf.div_no_nan(crossent, total_size)
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        
        l = tf.round(tf.sigmoid(self.logits))
        self.accuracy = tf.reduce_mean(tf.cast(tf.boolean_mask(l, tf.equal(self.Y, 1)), tf.float32))

In [7]:
size_layer = 256
embedded_size = 256
learning_rate = 1e-3

In [8]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(size_layer,embedded_size,len(dictionary),learning_rate)
sess.run(tf.global_variables_initializer())

Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Use keras.layers.dropout instead.


In [9]:
UNK = 3

def str_idx(corpus, dic):
    X = []
    for i in corpus:
        ints = []
        for k in i.split():
            ints.append(dic.get(k,UNK))
        X.append(ints)
    return X

def pad_sentence_batch(sentence_batch, pad_int):
    padded_seqs = []
    seq_lens = []
    max_sentence_len = max([len(sentence) for sentence in sentence_batch])
    for sentence in sentence_batch:
        padded_seqs.append(sentence + [pad_int] * (max_sentence_len - len(sentence)))
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

In [10]:
train_X = str_idx(dataset['train_texts'], dictionary)
test_X = str_idx(dataset['test_texts'], dictionary)

In [11]:
train_clss = dataset['train_clss']
test_clss = dataset['test_clss']
train_Y = dataset['train_labels']
test_Y = dataset['test_labels']

In [12]:
batch_x, _ = pad_sentence_batch(train_X[:64], 0)
batch_y, _ = pad_sentence_batch(train_Y[:64], 0)
batch_clss, _ = pad_sentence_batch(train_clss[:64], -1)
batch_clss = np.array(batch_clss)
batch_mask = 1 - (batch_clss == -1)
batch_clss[batch_clss == -1] = 0

feed = {model.X: batch_x,
        model.Y: batch_y,
        model.mask: batch_mask,
        model.clss: batch_clss}
acc, loss, _ = sess.run([model.accuracy, model.cost,model.optimizer], feed_dict = feed)
acc, loss

(1.0, 1.4390177)

In [13]:
import tqdm

batch_size = 64
epoch = 20

for e in range(epoch):
    pbar = tqdm.tqdm(
        range(0, len(train_X), batch_size), desc = 'minibatch loop')
    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x, _ = pad_sentence_batch(train_X[i : index], 0)
        batch_y, _ = pad_sentence_batch(train_Y[i : index], 0)
        batch_clss, _ = pad_sentence_batch(train_clss[i : index], -1)
        batch_clss = np.array(batch_clss)
        batch_mask = 1 - (batch_clss == -1)
        batch_clss[batch_clss == -1] = 0
        feed = {model.X: batch_x,
                model.Y: batch_y,
                model.mask: batch_mask,
                model.clss: batch_clss}
        accuracy, loss, _ = sess.run([model.accuracy,model.cost,model.optimizer],
                                    feed_dict = feed)
        train_loss.append(loss)
        train_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    pbar = tqdm.tqdm(
        range(0, len(test_X), batch_size), desc = 'minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x, _ = pad_sentence_batch(test_X[i : index], 0)
        batch_y, _ = pad_sentence_batch(test_Y[i : index], 0)
        batch_clss, _ = pad_sentence_batch(test_clss[i : index], -1)
        batch_clss = np.array(batch_clss)
        batch_mask = 1 - (batch_clss == -1)
        batch_clss[batch_clss == -1] = 0
        feed = {model.X: batch_x,
                model.Y: batch_y,
                model.mask: batch_mask,
                model.clss: batch_clss}
        accuracy, loss = sess.run([model.accuracy,model.cost],
                                    feed_dict = feed)

        test_loss.append(loss)
        test_acc.append(accuracy)
        pbar.set_postfix(cost = loss, accuracy = accuracy)
    
    print('epoch %d, training avg loss %f, training avg acc %f'%(e+1,
                                                                 np.mean(train_loss),np.mean(train_acc)))
    print('epoch %d, testing avg loss %f, testing avg acc %f'%(e+1,
                                                              np.mean(test_loss),np.mean(test_acc)))

minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.0167, cost=0.376] 
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.00641, cost=0.397]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 1, training avg loss 0.387910, training avg acc 0.008459
epoch 1, testing avg loss 0.378677, testing avg acc 0.002847


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.0167, cost=0.37]  
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.67it/s, accuracy=0.00641, cost=0.393]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 2, training avg loss 0.374503, training avg acc 0.029881
epoch 2, testing avg loss 0.375109, testing avg acc 0.003962


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.1, cost=0.349]   
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.68it/s, accuracy=0.0321, cost=0.397]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 3, training avg loss 0.365837, training avg acc 0.061967
epoch 3, testing avg loss 0.378205, testing avg acc 0.052813


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.225, cost=0.324] 
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.67it/s, accuracy=0.0705, cost=0.406]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 4, training avg loss 0.351095, training avg acc 0.128159
epoch 4, testing avg loss 0.392094, testing avg acc 0.101840


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.358, cost=0.299]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.67it/s, accuracy=0.154, cost=0.426]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 5, training avg loss 0.329346, training avg acc 0.222128
epoch 5, testing avg loss 0.413041, testing avg acc 0.174466


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.433, cost=0.262]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.67it/s, accuracy=0.135, cost=0.455]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 6, training avg loss 0.305964, training avg acc 0.309037
epoch 6, testing avg loss 0.436760, testing avg acc 0.183248


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.533, cost=0.219]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.141, cost=0.492]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 7, training avg loss 0.281421, training avg acc 0.392515
epoch 7, testing avg loss 0.472197, testing avg acc 0.184607


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.7, cost=0.186]  
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.173, cost=0.543]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 8, training avg loss 0.252382, training avg acc 0.477697
epoch 8, testing avg loss 0.522769, testing avg acc 0.221310


minibatch loop: 100%|██████████| 1156/1156 [06:43<00:00,  2.87it/s, accuracy=0.742, cost=0.142]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.173, cost=0.641]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 9, training avg loss 0.225019, training avg acc 0.549972
epoch 9, testing avg loss 0.597408, testing avg acc 0.246061


minibatch loop: 100%|██████████| 1156/1156 [06:42<00:00,  2.87it/s, accuracy=0.775, cost=0.121]
minibatch loop: 100%|██████████| 289/289 [00:38<00:00,  7.60it/s, accuracy=0.154, cost=0.644]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 10, training avg loss 0.203076, training avg acc 0.606897
epoch 10, testing avg loss 0.594545, testing avg acc 0.227027


minibatch loop: 100%|██████████| 1156/1156 [06:37<00:00,  2.91it/s, accuracy=0.792, cost=0.102]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.61it/s, accuracy=0.179, cost=0.728]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 11, training avg loss 0.189718, training avg acc 0.639519
epoch 11, testing avg loss 0.670387, testing avg acc 0.249209


minibatch loop: 100%|██████████| 1156/1156 [06:37<00:00,  2.91it/s, accuracy=0.883, cost=0.0795]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.61it/s, accuracy=0.167, cost=0.775]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 12, training avg loss 0.168331, training avg acc 0.688725
epoch 12, testing avg loss 0.721593, testing avg acc 0.229948


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.892, cost=0.0754]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.167, cost=0.728]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 13, training avg loss 0.147944, training avg acc 0.734141
epoch 13, testing avg loss 0.681439, testing avg acc 0.234368


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.9, cost=0.0825]  
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.64it/s, accuracy=0.192, cost=0.824]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 14, training avg loss 0.135995, training avg acc 0.758445
epoch 14, testing avg loss 0.749728, testing avg acc 0.255525


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.958, cost=0.0592]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.64it/s, accuracy=0.205, cost=0.873]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 15, training avg loss 0.127149, training avg acc 0.778638
epoch 15, testing avg loss 0.811870, testing avg acc 0.269463


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.9, cost=0.0623]  
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.66it/s, accuracy=0.186, cost=0.957]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 16, training avg loss 0.110549, training avg acc 0.812205
epoch 16, testing avg loss 0.862229, testing avg acc 0.248714


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.875, cost=0.0698]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.64it/s, accuracy=0.212, cost=0.879]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 17, training avg loss 0.099921, training avg acc 0.834709
epoch 17, testing avg loss 0.792213, testing avg acc 0.235911


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.933, cost=0.0742]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.205, cost=0.793]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 18, training avg loss 0.099067, training avg acc 0.837627
epoch 18, testing avg loss 0.752785, testing avg acc 0.243941


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.933, cost=0.0539]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.205, cost=0.932]
minibatch loop:   0%|          | 0/1156 [00:00<?, ?it/s]

epoch 19, training avg loss 0.087893, training avg acc 0.859114
epoch 19, testing avg loss 0.854014, testing avg acc 0.258605


minibatch loop: 100%|██████████| 1156/1156 [06:35<00:00,  2.92it/s, accuracy=0.933, cost=0.0515]
minibatch loop: 100%|██████████| 289/289 [00:37<00:00,  7.65it/s, accuracy=0.179, cost=0.983]

epoch 20, training avg loss 0.085529, training avg acc 0.863731
epoch 20, testing avg loss 0.869859, testing avg acc 0.236150





In [14]:
from tensor2tensor.utils import rouge
from tensorflow.keras.preprocessing import sequence

def calculate_rouges(predicted, batch_y):
    non = np.count_nonzero(batch_y, axis = 1)
    o = []
    for n in non:
        o.append([True for _ in range(n)])
    b = sequence.pad_sequences(o, dtype = np.bool, padding = 'post', value = False)
    batch_y = np.array(batch_y)
    rouges = []
    for i in range(predicted.shape[0]):
        a = batch_y[i][b[i]]
        p = predicted[i][b[i]]
        rouges.append(rouge.rouge_n([p], [a]))
    return np.mean(rouges)

In [15]:
batch_x, _ = pad_sentence_batch(test_X[: 5], 0)
batch_y, _ = pad_sentence_batch(test_Y[: 5], 0)
batch_clss, _ = pad_sentence_batch(test_clss[: 5], -1)
batch_clss = np.array(batch_clss)
batch_y = np.array(batch_y)
batch_x = np.array(batch_x)
cp_batch_clss = batch_clss.copy()
batch_mask = 1 - (batch_clss == -1)
batch_clss[batch_clss == -1] = 0

feed = {model.X: batch_x,
        model.mask: batch_mask,
        model.clss: batch_clss}
predicted = sess.run(tf.round(tf.sigmoid(model.logits)), feed_dict = feed)

In [16]:
from tensor2tensor.utils import rouge

def calculate_rouge(predicted, batch_y, cp_batch_clss, batch_x):
    f, y_, predicted_ = [], [], []
    for i in range(len(cp_batch_clss)):
        f.append(cp_batch_clss[i][cp_batch_clss[i] != -1])
        y_.append(batch_y[i][cp_batch_clss[i] != -1])
        predicted_.append(predicted[i][cp_batch_clss[i] != -1])
    
    actual, predict = [], []
    for i in range(len(f)):
        actual_, predict_ = [], []
        for k in range(len(f[i])):
            if k == (len(f[i]) - 1):
                s = batch_x[i][f[i][k]:]
                s = s[s != 0]
            else:
                s = batch_x[i][f[i][k]: f[i][k + 1]]
            s = [w for w in s if w not in [0, 1, 2, 3, 5, 6, 7, 8]]
            if y_[i][k]:
                actual_.extend(s)
            if predicted_[i][k]:
                predict_.extend(s)
        actual.append(actual_)
        predict.append(predict_)
    return rouge.rouge_n(predict, actual)

calculate_rouge(predicted, batch_y, cp_batch_clss, batch_x)

0.19125411

In [17]:
from tqdm import tqdm as tqdm_base
def tqdm(*args, **kwargs):
    if hasattr(tqdm_base, '_instances'):
        for instance in list(tqdm_base._instances):
            tqdm_base._decr_instances(instance)
    return tqdm_base(*args, **kwargs)

In [18]:
rouges = []

pbar = tqdm(
    range(0, len(test_X), 32), desc = 'minibatch loop')
for i in pbar:
    index = min(i + batch_size, len(test_X))
    batch_x, _ = pad_sentence_batch(test_X[i: index], 0)
    batch_y, _ = pad_sentence_batch(test_Y[i: index], 0)
    batch_clss, _ = pad_sentence_batch(test_clss[i: index], -1)
    batch_clss = np.array(batch_clss)
    batch_y = np.array(batch_y)
    batch_x = np.array(batch_x)
    cp_batch_clss = batch_clss.copy()
    batch_mask = 1 - (batch_clss == -1)
    batch_clss[batch_clss == -1] = 0

    feed = {model.X: batch_x,
            model.mask: batch_mask,
            model.clss: batch_clss}
    predicted = sess.run(tf.round(tf.sigmoid(model.logits)), feed_dict = feed)
    rouge_ = calculate_rouge(predicted, batch_y, cp_batch_clss, batch_x)
    rouges.append(rouge_)
    pbar.set_postfix(rouge = rouge_)

minibatch loop: 100%|██████████| 2312/2312 [14:40<00:00,  2.63it/s, rouge=0.211]


In [19]:
np.mean(rouges)

0.26330408