<a href="https://colab.research.google.com/github/nafabrar/AES/blob/master/ASAP_DM_BCA_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Notebook for training a pretrained DM-BCA model

The model stored in bca_dm_model is pretrained on the discourse marker prediction task. This notebook trains the model on ASAP essay data. Please pre-process the ASAP data using the script ASAP_dataParse.py

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My Drive/AES/AES

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/AES/AES


In [2]:
!pip install tensorflow==1.12
!pip install keras-attention


Collecting tensorflow==1.12
[?25l  Downloading https://files.pythonhosted.org/packages/22/cc/ca70b78087015d21c5f3f93694107f34ebccb3be9624385a911d4b52ecef/tensorflow-1.12.0-cp36-cp36m-manylinux1_x86_64.whl (83.1MB)
[K     |████████████████████████████████| 83.1MB 41kB/s 
Collecting tensorboard<1.13.0,>=1.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/07/53/8d32ce9471c18f8d99028b7cef2e5b39ea8765bd7ef250ca05b490880971/tensorboard-1.12.2-py3-none-any.whl (3.0MB)
[K     |████████████████████████████████| 3.1MB 33.2MB/s 
Installing collected packages: tensorboard, tensorflow
  Found existing installation: tensorboard 2.2.2
    Uninstalling tensorboard-2.2.2:
      Successfully uninstalled tensorboard-2.2.2
  Found existing installation: tensorflow 2.2.0
    Uninstalling tensorflow-2.2.0:
      Successfully uninstalled tensorflow-2.2.0
Successfully installed tensorboard-1.12.2 tensorflow-1.12.0
Collecting keras-attention
  Downloading https://files.pythonhosted.org/packag

In [12]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [5]:
from __future__ import print_function, division

import os
import os.path
import pandas as pd
from io import StringIO
import io
import unicodedata
import re
import random

import tensorflow as tf
import numpy as np
np.set_printoptions(threshold = 10000)
import collections
import random

from tensorflow.contrib.rnn import LSTMCell as Cell #for GRU: custom implementation with normalization
from tensorflow.python.ops.rnn import dynamic_rnn as rnn
from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn
from tensorflow.contrib.rnn import DropoutWrapper

from attention import attention as attention
from bca_ import *
from ordloss import *
from utils import *
from datautilsbca import *


from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from scipy import stats
from sklearn.metrics import accuracy_score

In [6]:
#read data; SEQUENCE_LENGTH is maximum length of sentence in words, SEQUENCE_LENGTH_D is maximum length of document in sentences. 
SEQUENCE_LENGTH = 40
SEQUENCE_LENGTH_D = 25
max_vocab = 75000
train_split = 0.9
BATCH_SIZE = 20

# system parameters
HIDDEN_SIZE = 150
HIDDEN_SIZE_D = 150
ATTENTION_SIZE = 75
ATTENTION_SIZE_D = 50
LAYER_1 = 500
LAYER_2 = 250
LAYER_3 = 100
KEEP_PROB = 0.7
#NUM_EPOCHS = 1  # max val_acc at __
DELTA = 0.75

In [7]:
#the path specifies which set and which fold will be used
fpath = 'data/ASAP/fold_0/1'

In [8]:
#add dict name
dict_name = 'bca_dm_model/dict.csv'
# load the dictionary from the pre-trained model folder
import csv 
dictionary = {}
for key,val in csv.reader(open(dict_name)):
    dictionary[key] = val

# the test data set; the fformat is csv, with the text column labelled 'text'
df_test = pd.read_csv(os.path.join(fpath,'test.csv'))
df_train = pd.read_csv(os.path.join(fpath,'train.csv'))
df_val = pd.read_csv(os.path.join(fpath,'dev.csv'))


In [10]:
def read_test_set(df_test, dictionary, SEQUENCE_LEN_D = 40, SEQUENCE_LEN = 65, BATCH_SIZE = 10, min_= 0, max_ = 10):
    count_oov_train = 0
    count_iv_train = 0
    X_train = []

    for i in df_test['text1']:
        i = sent_tokenize(i)
        X_train.append([dictionary['START_SENT']])
        for j in i[:SEQUENCE_LEN_D-2]:
            j = str(j)
            #print(j)
            x = j.split()
            data = []
            #print(x)
            data.append(dictionary['START'])
            for word in x:
                if word in dictionary:
                    index = dictionary[word]
                    count_iv_train += 1

                else:
                    index = dictionary['UNK']
                    count_oov_train += 1
                data.append(index)
            data.append(dictionary['END'])
            X_train.append(data)
        X_train.append([dictionary['END_SENT']])
        for k in range(max(SEQUENCE_LEN_D -  (len(i)+2), 0)):
            X_train.append([0]) # pad token maps to 0

    print('len of test set: ', len(X_train)//BATCH_SIZE)

        
    rank_val = list(df_test['label'].values)
    rank_val.extend([i for i in range(min_,max_)])
    target_val = np.array(rank_val)
    onehot_encoder = OneHotEncoder(sparse=False)
    
    integer_encoded = target_val.reshape(len(target_val), 1)
    y_test = onehot_encoder.fit_transform(integer_encoded)
    y_test = y_test[:-len([i for i in range(min_,max_)])]

    return X_train, y_test

In [13]:
#min and max+1 score range for the ASAP set; this is to ensure that all labels are present in the one-hot encoding
mi = 2
ma = 13

X_test, y_test = read_test_set(df_test, dictionary, SEQUENCE_LEN_D = SEQUENCE_LENGTH_D, SEQUENCE_LEN = SEQUENCE_LENGTH, min_= mi, max_ = ma)
X_val, y_val = read_test_set(df_val, dictionary, SEQUENCE_LEN_D = SEQUENCE_LENGTH_D, SEQUENCE_LEN = SEQUENCE_LENGTH, min_= mi, max_ = ma)
X_train, y_train = read_test_set(df_train, dictionary, SEQUENCE_LEN_D = SEQUENCE_LENGTH_D, SEQUENCE_LEN = SEQUENCE_LENGTH, min_= mi, max_ = ma)

len of test set:  892
len of test set:  890
len of test set:  2675


In [14]:
doc_vocab_size = len(dictionary)
NUM_WORDS = doc_vocab_size
EMBEDDING_DIM = 300


print('Sentence length:',SEQUENCE_LENGTH)
print('Document length:',SEQUENCE_LENGTH_D)

print('Sentence length:',SEQUENCE_LENGTH)
print('Document length:',SEQUENCE_LENGTH_D)
print('Hidden size:',HIDDEN_SIZE)
print('Hidden size sentence level:',HIDDEN_SIZE_D)

y_test_len = len(y_test)

#use ordinal regression; logistic regression if False
ordinal = True

Sentence length: 40
Document length: 25
Sentence length: 40
Document length: 25
Hidden size: 150
Hidden size sentence level: 150


In [15]:
# Sequences preprocessing
vocabulary_size = doc_vocab_size 

X_train = zero_pad(X_train, SEQUENCE_LENGTH)
X_test = zero_pad(X_test, SEQUENCE_LENGTH)
X_val = zero_pad(X_val, SEQUENCE_LENGTH)

#batch size padding 
X_test = zero_pad_test(X_test, BATCH_SIZE*SEQUENCE_LENGTH_D)
y_test = zero_pad_test(y_test, BATCH_SIZE)

In [16]:
tf.reset_default_graph()
#Different placeholders
num_classes_asap = y_train.shape[1]
num_classes = 3
num_classes_s = 8
num_classes_s1 = 4
batch_ph = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH])
ind_list_ph = tf.placeholder(tf.int32, [None])
target_ph = tf.placeholder(tf.float32, [None,num_classes])
target_ph_s = tf.placeholder(tf.float32, [None,num_classes_s])
target_ph_s1 = tf.placeholder(tf.float32, [None,num_classes_s1])

seq_len_ph = tf.placeholder(tf.int32, [None])
seq_len_ph_d = tf.placeholder(tf.int32, [None])
keep_prob_ph = tf.placeholder(tf.float32)
doc_size_ph = tf.placeholder(tf.int32,[None])


# Embedding layer
embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph)
batch_embedded = tf.nn.dropout(batch_embedded, keep_prob_ph)

W_omega = tf.Variable(tf.random_uniform([HIDDEN_SIZE*2, HIDDEN_SIZE*2], -1.0, 1.0))
# (Bi-)RNN layer(-s)
with tf.variable_scope('sentence'):
    fw_cell = Cell(HIDDEN_SIZE)
    bw_cell = Cell(HIDDEN_SIZE)
    
    fw_cell = DropoutWrapper(fw_cell, input_keep_prob=keep_prob_ph, 
                             output_keep_prob=keep_prob_ph,state_keep_prob=keep_prob_ph,
                             variational_recurrent=True, input_size=batch_embedded.get_shape()[-1], 
                             dtype = tf.float32)
    bw_cell = DropoutWrapper(bw_cell, input_keep_prob=keep_prob_ph, 
                             output_keep_prob=keep_prob_ph,state_keep_prob= keep_prob_ph,
                             variational_recurrent=True, input_size=batch_embedded.get_shape()[-1], 
                             dtype = tf.float32)
    rnn_output, _ = bi_rnn(fw_cell, bw_cell, inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)

    rnn_outputs = cross_attention(rnn_output, 2 , seq_len_ph, BATCH_SIZE, W_omega, time_major=False, return_alphas=False)
    attention_output, alphas = attention(rnn_outputs, ATTENTION_SIZE, seq_len_ph, return_alphas=True)
    rnn_outputs_ = cross_attention(rnn_output, SEQUENCE_LENGTH_D, seq_len_ph, BATCH_SIZE, W_omega)
    attention_output_, alphas_ = attention(rnn_outputs_ , ATTENTION_SIZE, seq_len_ph, return_alphas = True)
    attention_output_ = tf.reshape(attention_output_,[BATCH_SIZE, -1, HIDDEN_SIZE*2*3])
    
with tf.variable_scope('document'):
    fw_cell_d = Cell(HIDDEN_SIZE_D)
    bw_cell_d = Cell(HIDDEN_SIZE_D)
    
    fw_cell_d = DropoutWrapper(fw_cell_d, input_keep_prob=keep_prob_ph, 
                             output_keep_prob=keep_prob_ph,state_keep_prob=keep_prob_ph,
                             variational_recurrent=True, input_size=attention_output_.get_shape()[-1], 
                             dtype = tf.float32)
    bw_cell_d = DropoutWrapper(bw_cell_d, input_keep_prob=keep_prob_ph, 
                             output_keep_prob=keep_prob_ph,state_keep_prob= keep_prob_ph,
                             variational_recurrent=True, input_size=attention_output_.get_shape()[-1], 
                             dtype = tf.float32)
    rnn_outputs_d, _ = bi_rnn(fw_cell_d, bw_cell_d, inputs=attention_output_, 
                              sequence_length=seq_len_ph_d, dtype=tf.float32)
    
    #rnn_outputs_d, _ = bi_rnn(Cell(HIDDEN_SIZE_D), Cell(HIDDEN_SIZE_D), inputs=attention_output, sequence_length=seq_len_ph_d, dtype=tf.float32)
    attention_output_d, alphas_d = attention(rnn_outputs_d, ATTENTION_SIZE_D, seq_len_ph_d, return_alphas=True)

# Dropout
drop = tf.nn.dropout(attention_output_d, keep_prob_ph)



#first classifier for first task using the representation from attention_outputs
#adding more layers... 
attention_output_sentorder = tf.reshape(attention_output, [-1,HIDDEN_SIZE*2*2*3])
W_s1_ = tf.Variable(tf.truncated_normal([HIDDEN_SIZE*2*2*3, LAYER_1], stddev=0.1))  
b_s1_ = tf.Variable(tf.truncated_normal([LAYER_1]))
y_hat_s1_ = tf.nn.xw_plus_b(attention_output_sentorder, W_s1_, b_s1_)
W_s2 = tf.Variable(tf.truncated_normal([LAYER_1, LAYER_2], stddev=0.1))  
b_s2 = tf.Variable(tf.truncated_normal([LAYER_2]))
y_hat_s2 = tf.nn.xw_plus_b(y_hat_s1_, W_s2, b_s2)

W_s = tf.Variable(tf.truncated_normal([LAYER_2, num_classes_s], stddev=0.1))  
b_s = tf.Variable(tf.truncated_normal([num_classes_s]))
y_hat_s = tf.nn.xw_plus_b(y_hat_s2, W_s, b_s)
y_preds_s = tf.argmax(y_hat_s, axis = 1)
loss_s = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_hat_s, labels=target_ph_s))

#second classifier for second task using the representation from attention_outputs
W_s1__ = tf.Variable(tf.truncated_normal([HIDDEN_SIZE*2*2*3, LAYER_1], stddev=0.1))  
b_s1__ = tf.Variable(tf.truncated_normal([LAYER_1]))
y_hat_s1__ = tf.nn.xw_plus_b(attention_output_sentorder, W_s1__, b_s1__)
W_s2_ = tf.Variable(tf.truncated_normal([LAYER_1, LAYER_2], stddev=0.1))  
b_s2_ = tf.Variable(tf.truncated_normal([LAYER_2]))
y_hat_s2_ = tf.nn.xw_plus_b(y_hat_s1__, W_s2_, b_s2_)

W_s1 = tf.Variable(tf.truncated_normal([LAYER_2, num_classes_s1], stddev=0.1))  
b_s1 = tf.Variable(tf.truncated_normal([num_classes_s1]))
y_hat_s1 = tf.nn.xw_plus_b(y_hat_s2_, W_s1, b_s1)
y_preds_s1 = tf.argmax(y_hat_s1, axis = 1)
loss_s1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_hat_s1, labels=target_ph_s1))


if ordinal:
    # For ordinal regression, same weights for each class
    W = tf.Variable(tf.truncated_normal([drop.get_shape()[1].value], stddev=0.1))
    W_ = tf.transpose(tf.reshape(tf.tile(W,[num_classes - 1]),[num_classes - 1, drop.get_shape()[1].value]))
    b = tf.Variable(tf.cast(tf.range(num_classes - 1), dtype = tf.float32))
    y_hat_ = tf.nn.xw_plus_b(drop, tf.negative(W_), b)

    # Predicted labels and logits
    y_preds, logits = preds(y_hat_,BATCH_SIZE)
    y_true = tf.argmax(target_ph, axis = 1)

    # Ordinal loss
    loss = ordloss_m(y_hat_, target_ph, BATCH_SIZE)
    c = stats.spearmanr
    str_score = "Spearman rank:"
    
    
else:
    W = tf.Variable(tf.truncated_normal([drop.get_shape()[1].value, num_classes], stddev=0.1))  
    b = tf.Variable(tf.truncated_normal([num_classes]))
    y_hat_ = tf.nn.xw_plus_b(drop, W, b)
    # Cross-entropy loss and optimizer initialization
    y_preds = tf.argmax(y_hat_, axis = 1)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_hat_, labels=target_ph))
    c = accuracy_score
    str_score = "accucary:"
    
# Calculate and clip gradients
max_gradient_norm = 5
lr = 5e-4
params = tf.trainable_variables()
gradients = tf.gradients(loss, params)
clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
optimizer_ = tf.train.AdamOptimizer(learning_rate=lr)
optimizer = optimizer_.apply_gradients(
    zip(clipped_gradients, params))

#second optimizer for sentence order
gradients_s = tf.gradients(loss_s, params)
clipped_gradients_s, _ = tf.clip_by_global_norm(gradients_s, max_gradient_norm)
optimizer_s = optimizer_.apply_gradients(
    zip(clipped_gradients_s, params))

#third optimizer for sentence order
gradients_s1 = tf.gradients(loss_s1, params)
clipped_gradients_s1, _ = tf.clip_by_global_norm(gradients_s1, max_gradient_norm)
optimizer_s1 = optimizer_.apply_gradients(
    zip(clipped_gradients_s1, params))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [17]:
MODEL_PATH = 'bca_dm_model/model300-20500-1800-1800'
saver = tf.train.Saver()

In [18]:
sess = tf.Session()
saver.restore(sess, MODEL_PATH)

INFO:tensorflow:Restoring parameters from bca_dm_model/model300-20500-1800-1800


In [19]:
###ASAP 
target_ph_asap = tf.placeholder(tf.float32, [None,num_classes_asap])

W_asap = tf.Variable(tf.truncated_normal([drop.get_shape()[1].value], stddev=0.1))
W_asap_ = tf.transpose(tf.reshape(tf.tile(W_asap,[num_classes_asap - 1]),[num_classes_asap - 1, drop.get_shape()[1].value]))
b_asap = tf.Variable(tf.cast(tf.range(num_classes_asap - 1), dtype = tf.float32))
y_hat_asap_ = tf.nn.xw_plus_b(drop, tf.negative(W_asap_), b_asap)

# Predicted labels and logits
y_preds_asap, logits_asap = preds(y_hat_asap_,BATCH_SIZE)
y_true_asap = tf.argmax(target_ph_asap, axis = 1)

# Ordinal loss
loss_asap = ordloss_m(y_hat_asap_, target_ph_asap, BATCH_SIZE)

gradients_asap = tf.gradients(loss_asap, params)
clipped_gradients_asap, _ = tf.clip_by_global_norm(gradients_asap, max_gradient_norm)
optimizer_asap = optimizer_.apply_gradients(
    zip(clipped_gradients_asap, params))


In [20]:
uninitialized_vars = []
for var in tf.all_variables():
    try:
        sess.run(var)
    except tf.errors.FailedPreconditionError:
        uninitialized_vars.append(var)

init_new_vars_op = tf.initialize_variables(uninitialized_vars)
sess.run(init_new_vars_op)

Instructions for updating:
Please use tf.global_variables instead.
Instructions for updating:
Use `tf.variables_initializer` instead.


In [21]:
#Main training task
train_batch_generator = batch_generator(X_train, y_train, BATCH_SIZE, seq_len = SEQUENCE_LENGTH_D)
val_batch_generator = batch_generator(X_val, y_val, BATCH_SIZE, seq_len = SEQUENCE_LENGTH_D)
test_batch_generator = batch_generator(X_test, y_test, BATCH_SIZE, seq_len = SEQUENCE_LENGTH_D, shuffle = False)

In [22]:
train_accuracy = []
val_accuracy = []
val_counter = []
val_count = 50
loss_train = 0
NUM_EPOCHS = 25
doc_size_np = np.array([0]*SEQUENCE_LENGTH_D)
batch_counter = 0
KEEP_PROB = 0.75
print('Training on ASAP data')


for epoch in range(NUM_EPOCHS):
    print("epoch: {}\t".format(epoch), end="")

    # Training
    num_batches = X_train.shape[0] // (BATCH_SIZE*SEQUENCE_LENGTH_D)
    true = []
    ypreds = []
    #y_temp = np.zeros((BATCH_SIZE,num_classes))

    for bx in range(num_batches):
        batch_counter += 1
        x_batch, y_batch = next(train_batch_generator)
        seq_len = np.array([list(x).index(0) + 1 for x in x_batch])  # actual lengths of sequences
        seq_len_d = []               
        l = SEQUENCE_LENGTH_D
        for i in range(0,len(x_batch),l):
            for j in range(i,i+l):
                if list(x_batch[j]).index(0) == 0:
                    seq_len_d.append(j%l)
                    break
                elif j == i+l-1:
                    seq_len_d.append(l)

        seq_len_d = np.array(seq_len_d)

        y_preds_, loss_tr,  _  = sess.run([y_preds_asap, loss_asap,  optimizer_asap],
                                   feed_dict={batch_ph: x_batch,
                                              target_ph_asap: y_batch,
                                              seq_len_ph: seq_len,
                                              seq_len_ph_d: seq_len_d,
                                              doc_size_ph: doc_size_np,
                                              keep_prob_ph: KEEP_PROB})
        loss_train = loss_tr * DELTA + loss_train * (1 - DELTA)
        ypreds.extend(y_preds_)
        t = np.argmax(y_batch, axis = 1)
        true.extend(t)

        sp = c(y_preds_,t)
        if ordinal: 
            sp = sp[0]
        train_accuracy.append(sp)

        #testing on the validation set            
        if batch_counter%val_count == 0:
            val_counter.append(batch_counter)
            x_batch, y_batch = next(val_batch_generator)
            seq_len = np.array([list(x).index(0) + 1 for x in x_batch])  # actual lengths of sequences
            seq_len_d = []               
            l = SEQUENCE_LENGTH_D
            for i in range(0,len(x_batch),l):
                for j in range(i,i+l):
                    if list(x_batch[j]).index(0) == 0:
                        seq_len_d.append(j%l)
                        break
                    elif j == i+l-1:
                        seq_len_d.append(l)

            seq_len_d = np.array(seq_len_d)

            y_preds_,loss_t,att_test = sess.run([y_preds_asap,loss_asap,attention_output_],
                          feed_dict={batch_ph: x_batch,
                                target_ph_asap: y_batch,
                                seq_len_ph: seq_len,
                                seq_len_ph_d: seq_len_d,
                                doc_size_ph: doc_size_np,
                                keep_prob_ph: 1.0})
            ypreds.extend(y_preds_)
            t = np.argmax(y_batch, axis = 1)
            true.extend(t)

            sp = c(y_preds_,t)
            if ordinal: 
                sp = sp[0]
            val_accuracy.append(sp)
    print('training loss: ' + str(loss_train))
    spr = c(true, ypreds)
    if ordinal:
        spr = spr[0]
    print('Training '+ str_score + str(spr))
    print('Val ' + str(np.mean(val_accuracy)))


Training on ASAP data
epoch: 0	

  c /= stddev[:, None]
  c /= stddev[None, :]
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


training loss: 1.6957044600683702
Training Spearman rank:0.16855440700903063
Val 0.8059974857283168
epoch: 1	training loss: 1.6240664957238848
Training Spearman rank:0.555913659063807
Val 0.837723444886757
epoch: 2	

KeyboardInterrupt: ignored

In [23]:
df_test = pd.read_csv(os.path.join(fpath,'test.csv'))
X_test, y_test = read_test_set(df_test, dictionary, SEQUENCE_LEN_D = SEQUENCE_LENGTH_D, SEQUENCE_LEN = SEQUENCE_LENGTH, min_= mi, max_ = ma)
X_test = zero_pad(X_test, SEQUENCE_LENGTH)

len of test set:  892


In [24]:
#batch size padding 
X_test = zero_pad_test(X_test, BATCH_SIZE*SEQUENCE_LENGTH_D)
y_test = zero_pad_test(y_test, BATCH_SIZE)

In [25]:
#testing on the test set
test_batch_generator = batch_generator(X_test, y_test, BATCH_SIZE, seq_len = SEQUENCE_LENGTH_D, shuffle = False)

num_batches = X_test.shape[0] // (BATCH_SIZE*SEQUENCE_LENGTH_D)
true = []
ypreds = []

for bx in range(num_batches):
    x_batch, y_batch = next(test_batch_generator)
    seq_len = np.array([list(x).index(0) + 1 for x in x_batch])  # actual lengths of sequences
    seq_len_d = []               
    l = SEQUENCE_LENGTH_D
    for i in range(0,len(x_batch),l):
        for j in range(i,i+l):
            if list(x_batch[j]).index(0) == 0:
                seq_len_d.append(j%l)
                break
            elif j == i+l-1:
                seq_len_d.append(l)

    seq_len_d = np.array(seq_len_d)

    y_preds_,loss_t,att_test = sess.run([y_preds_asap,loss_asap,attention_output_],
                  feed_dict={batch_ph: x_batch,
                        target_ph_asap: y_batch,
                        seq_len_ph: seq_len,
                        seq_len_ph_d: seq_len_d,
                        doc_size_ph: doc_size_np,
                        keep_prob_ph: 1.0})
    ypreds.extend(y_preds_)
    t = np.argmax(y_batch, axis = 1)
    true.extend(t)

true = true[:y_test_len]
ypreds = ypreds[:y_test_len]

spr = c(true, ypreds)

if ordinal:
    spr = spr[0]
print('Test set '+ str_score + str(spr))

rank = stats.spearmanr
print('sp rho')
print(rank(true, ypreds))

from sklearn.metrics import cohen_kappa_score as kappa
print('qwk')
print(kappa(true, ypreds, weights="quadratic"))

from scipy.stats import pearsonr
print('pearson')
print(pearsonr(true,ypreds))

print('kappa')
print(kappa(true, ypreds, weights=None))


Test set Spearman rank:0.8139355823171375
sp rho
SpearmanrResult(correlation=0.8139355823171375, pvalue=9.68129220482734e-86)
qwk
0.765090909090909
pearson
(0.8324962498283603, 4.571985159788707e-93)
kappa
0.319013362825573
