In [None]:
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import pandas as pd
import os
import json
import pickle as pkl
import sys

sys.path.insert(0, "..")


# In[2]:


def vectorize_np(ex, args, word_dict, feature_dict, single_answer=False):
    """Torchify a single example."""
    #args = model.args
    #word_dict = model.word_dict
    #feature_dict = model.feature_dict

    # Index words
    #document = torch.LongTensor([word_dict[w] for w in ex['document']])
    #question = torch.LongTensor([word_dict[w] for w in ex['question']])
    from collections import Counter
    document = np.asarray([word_dict[w] for w in ex['document']], dtype = np.int32)
    question = np.asarray([word_dict[w] for w in ex['question']], dtype = np.int32)

    # Create extra features vector
    if len(feature_dict) > 0:
        #features = torch.zeros(len(ex['document']), len(feature_dict))
        features = np.zeros([len(ex['document']), len(feature_dict)])
    else:
        features = None

    # f_{exact_match}
    if args.use_in_question:
        q_words_cased = {w for w in ex['question']}
        q_words_uncased = {w.lower() for w in ex['question']}
        q_lemma = {w for w in ex['qlemma']} if args.use_lemma else None
        for i in range(len(ex['document'])):
            if ex['document'][i] in q_words_cased:
                features[i][feature_dict['in_question']] = 1.0
            if ex['document'][i].lower() in q_words_uncased:
                features[i][feature_dict['in_question_uncased']] = 1.0
            if q_lemma and ex['lemma'][i] in q_lemma:
                features[i][feature_dict['in_question_lemma']] = 1.0

    # f_{token} (POS)
    if args.use_pos:
        for i, w in enumerate(ex['pos']):
            f = 'pos=%s' % w
            if f in feature_dict:
                features[i][feature_dict[f]] = 1.0

    # f_{token} (NER)
    if args.use_ner:
        for i, w in enumerate(ex['ner']):
            f = 'ner=%s' % w
            if f in feature_dict:
                features[i][feature_dict[f]] = 1.0

    # f_{token} (TF)
    if args.use_tf:
        counter = Counter([w.lower() for w in ex['document']])
        l = len(ex['document'])
        for i, w in enumerate(ex['document']):
            features[i][feature_dict['tf']] = counter[w.lower()] * 1.0 / l

    # Maybe return without target
    if 'answers' not in ex:
        return document, features, question, ex['id']

    # ...or with target(s) (might still be empty if answers is empty)
    if single_answer:
        assert(len(ex['answers']) > 0)
        #start = torch.LongTensor(1).fill_(ex['answers'][0][0])
        #end = torch.LongTensor(1).fill_(ex['answers'][0][1])
        start = np.asarray([ex['answers'][0][0]], dtype = np.int32)
        end = np.asarray([ex['answers'][0][1]], dtype = np.int32)
    else:
        start = [a[0] for a in ex['answers']]
        end = [a[1] for a in ex['answers']]
        
        ####
        from functools import partial
        start, end = map(partial(np.asarray, dtype = np.int32), [start, end])

    return document, features, question, start, end, ex['id']


# In[3]:


#notebook_dumps_dir = "/home/svjack/temp_dir/DrQA/DrQA/notebook_dumps/"
#notebook_dumps_dir = "notebook_dumps"
notebook_dumps_dir = "../notebook_dumps"

def load_file(file_format, file_name):
    assert file_format in ["json", "pkl"]
    file_path = os.path.join(notebook_dumps_dir, "{}.{}".format(file_name, file_format))
    if file_format == "json":
        with open(file_path, "r", encoding = "utf-8") as f:
            obj = json.load(f)
    elif file_format == "pkl":
        with open(file_path, "rb") as f:
            obj = pkl.load(f)
    else:
        1 / 0
    print("file load from {}".format(file_path))
    return obj


# In[4]:


args_dict = load_file("json", "args")


# In[5]:


from collections import namedtuple
args_tuple = namedtuple("args_tuple", list(args_dict.keys()))


# In[6]:


for k, v in args_dict.items():
    exec("args_tuple.{}={}".format(k, "'{}'".format(v) if type(v) == type("") else v))


# In[7]:


word_dict = load_file("pkl", "word_dict")
feature_dict = load_file("pkl", "feature_dict")
dev_exs = load_file("pkl", "dev_exs")
train_exs = load_file("pkl", "train_exs")


# In[8]:


def batchify_np(batch):
    """Gather a batch of individual examples into one batch."""
    NUM_INPUTS = 3
    NUM_TARGETS = 2
    NUM_EXTRA = 1

    ids = [ex[-1] for ex in batch]
    docs = [ex[0] for ex in batch]
    features = [ex[1] for ex in batch]
    questions = [ex[2] for ex in batch]

    # Batch documents and features
    #max_length = max([d.size(0) for d in docs])
    max_length = max([d.size for d in docs])
    x1 = np.zeros([len(docs), max_length])
    #x1 = torch.LongTensor(len(docs), max_length).zero_()
    #x1_mask = torch.ByteTensor(len(docs), max_length).fill_(1)
    x1_length = np.asarray([d.size for d in docs], dtype = np.int32)
    if features[0] is None:
        x1_f = None
    else:
        #x1_f = torch.zeros(len(docs), max_length, features[0].size(1))
        x1_f = np.zeros([len(docs), max_length, features[0].shape[1]])
    for i, d in enumerate(docs):
        #x1[i, :d.size(0)].copy_(d)
        #x1_mask[i, :d.size(0)].fill_(0)
        x1[i, :d.shape[0]] = d
        if x1_f is not None:
            #x1_f[i, :d.size(0)].copy_(features[i])
            x1_f[i, :d.shape[0]] = features[i]
            
    # Batch questions
    #max_length = max([q.size(0) for q in questions])
    max_length = max([q.size for q in questions])
    #x2 = torch.LongTensor(len(questions), max_length).zero_()
    x2 = np.zeros([len(questions), max_length])
    #x2_mask = torch.ByteTensor(len(questions), max_length).fill_(1)
    x2_length = np.asarray([q.size for q in questions], dtype = np.int32)
    for i, q in enumerate(questions):
        #x2[i, :q.size(0)].copy_(q)
        x2[i, :q.shape[0]] = q
        #x2_mask[i, :q.size(0)].fill_(0)

    # Maybe return without targets
    if len(batch[0]) == NUM_INPUTS + NUM_EXTRA:
        return x1, x1_f, x1_length, x2, x2_length, ids

    elif len(batch[0]) == NUM_INPUTS + NUM_EXTRA + NUM_TARGETS:
        # ...Otherwise add targets
        '''
        if torch.is_tensor(batch[0][3]):
            y_s = torch.cat([ex[3] for ex in batch])
            y_e = torch.cat([ex[4] for ex in batch])
        else:
            y_s = [ex[3] for ex in batch]
            y_e = [ex[4] for ex in batch]
        '''
        y_s = np.concatenate([ex[3] for ex in batch])
        y_e = np.concatenate([ex[4] for ex in batch])
    else:
        raise RuntimeError('Incorrect number of inputs per example.')

    return x1, x1_f, x1_length, x2, x2_length, y_s, y_e, ids


# In[9]:


def batch_samples_gen(exs, epoch_num = 1, batch_size = 10, args_tuple = args_tuple, word_dict = word_dict,
                      feature_dict = feature_dict, single_answer=True):
    assert epoch_num >= 1
    for epoch in range(epoch_num):
        perm_indices_array = np.random.permutation(list(range(len(exs))))
        perm_indices_list = perm_indices_array.tolist()
        temp_indices = []
        while perm_indices_list:
            while len(temp_indices) < batch_size and perm_indices_list:
                temp_indices.append(perm_indices_list.pop())
            
            exs_batch = list(map(lambda idx: vectorize_np(exs[idx], args_tuple, word_dict, feature_dict, single_answer=single_answer),
                           temp_indices))
            x1, x1_f, x1_length, x2, x2_length, y_s, y_e, ids = batchify_np(exs_batch)
            yield x1, x1_f, x1_length, x2, x2_length, y_s, y_e
            temp_indices = []
        print("epoch {} end !".format(epoch))


# In[10]:


import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import Bidirectional, LSTMCell, RNN, LSTM


class RnnDocReader_tf(object):
    def __init__(self, embeddings, update_embedding = True, num_features = None):
        self.embeddings = embeddings
        
        self.update_embedding = update_embedding
        self.num_features = num_features
        
        self.optimizer = "Adam"
        self.clip_grad = 0.5
        
        self.build_graph()
        
    def build_graph(self):
        self.add_placeholders()
        self.lookup_layer_op()
        self.RNN_layer_op(use_avg_weight = "attn")
        self.softmax_pred_op()
        self.loss_op()
        self.trainstep_op()
        self.init_op()
        
        
    def add_placeholders(self):
        self.x1 = tf.compat.v1.placeholder(tf.int32, shape = [None, None], name = "x1")
        self.x1_f = tf.compat.v1.placeholder(tf.float32, shape = [None, None], name = "x1_f")
        self.x2 = tf.compat.v1.placeholder(tf.int32, shape = [None, None], name = "x2")
        self.x1_length = tf.compat.v1.placeholder(tf.int32, shape = [None], name = "x1_length")
        self.x2_length = tf.compat.v1.placeholder(tf.int32, shape = [None], name = "x2_length")
        
        self.target_s = tf.compat.v1.placeholder(tf.int32, shape = [None], name = "target_s")
        self.target_e = tf.compat.v1.placeholder(tf.int32, shape = [None], name = "target_e")
        
        self.dropout_pl = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="dropout")
        self.lr_pl = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="lr")
        
        
    def lookup_layer_op(self):
        with tf.compat.v1.variable_scope("words"):
            _word_embeddings = tf.Variable(self.embeddings,
                                           dtype=tf.float32,
                                           trainable=self.update_embedding,
                                           name="_word_embeddings")
            x1_emb = tf.nn.embedding_lookup(params=_word_embeddings,
                                                     ids=self.x1,
                                                     name="x1_emb")
            x2_emb = tf.nn.embedding_lookup(params=_word_embeddings,
                                                     ids=self.x2,
                                                     name="x2_emb")
        self.x1_emb =  tf.nn.dropout(x1_emb, self.dropout_pl)
        self.x2_emb =  tf.nn.dropout(x2_emb, self.dropout_pl)
        
    
    def RNN_layer_op(self, use_avg_weight = "attn"):
        assert use_avg_weight in ["attn", "unif"]
        
        drnn_input = [self.x1_emb]
        print(self.x1_emb.shape)
        
        def produce_attn_layer(layer_name, query_input, value_input, filters = 100, kernel_size = 4):
            with tf.compat.v1.variable_scope(layer_name):
                cnn_layer = tf.keras.layers.Conv1D(
                    filters=filters,
                    kernel_size=kernel_size,
                    # Use 'same' padding so outputs have the same shape as inputs.
                    padding='same')
                # Query encoding of shape [batch_size, Tq, filters].
                query_seq_encoding = cnn_layer(query_input)
                # Value encoding of shape [batch_size, Tv, filters].
                value_seq_encoding = cnn_layer(value_input)

                # Query-value attention of shape [batch_size, Tq, filters].
                query_value_attention_seq = tf.keras.layers.Attention()(
                    [query_seq_encoding, value_seq_encoding])
                
                return query_value_attention_seq
        

        x2_weighted_emb = produce_attn_layer("x2_weighted_emb", self.x1_emb, self.x2_emb)
        drnn_input.append(x2_weighted_emb)

        
        x1_f_reshape = tf.reshape(self.x1_f, [-1, tf.shape(self.x1_emb)[1], self.num_features])
        drnn_input.append(x1_f_reshape)
        
        #### [B, N1, emb_dim + filters + feat_dim]
        before_rnn_input = tf.concat(drnn_input, axis = -1)
        
        def produce_birnn_layer(layer_name, hidden_size = 128):
            with tf.compat.v1.variable_scope(layer_name):
                forward_layer = LSTM(hidden_size, return_sequences=True)
                backward_layer = LSTM(hidden_size, return_sequences=True, go_backwards=True)
                return Bidirectional(forward_layer, backward_layer=backward_layer)
        
        
        bi_stack_lstm_with_mask_doc = tf.keras.Sequential([
                tf.keras.layers.Masking(mask_value=0.),
                produce_birnn_layer("doc-bi-stack-lstm")
                ])
        inputs = before_rnn_input
        mask = tf.expand_dims(tf.sequence_mask(self.x1_length, tf.shape(self.x1_emb)[1], dtype = tf.float32), axis = -1)
        #### [B, N1, hidden-dim]
        doc_hiddens = bi_stack_lstm_with_mask_doc(inputs * mask)
        
        bi_stack_lstm_with_mask_quest = tf.keras.Sequential([
                tf.keras.layers.Masking(mask_value=0.),
                produce_birnn_layer("quest-bi-stack-lstm")
                ])
        inputs = self.x2_emb
        mask = tf.expand_dims(tf.sequence_mask(self.x2_length, tf.shape(self.x2_emb)[1], dtype = tf.float32), axis = -1)
        #### [B, N2, hidden-dim]
        quest_hiddens = bi_stack_lstm_with_mask_quest(inputs * mask)
        
        def uniform_weights(x, x_length):
            inputs = tf.ones(tf.shape(x)[:2])
            mask = tf.sequence_mask(x_length, tf.shape(inputs)[1], dtype = tf.float32)
            mask_output = tf.keras.layers.Masking(mask_value=0.)(inputs * mask)
            mask_output = mask_output / tf.expand_dims(tf.reduce_sum(mask_output, axis = 1), 
                                                       len(mask_output.shape) - 1)
            
            return mask_output
        
        
        #### [B, hidden-dim]  
        if use_avg_weight == "unif":
            #### [B, N2]
            uniform_q_merged_weights = uniform_weights(quest_hiddens, self.x2_length)
            question_hidden = tf.reduce_sum(quest_hiddens * tf.expand_dims(uniform_q_merged_weights, axis = 2), axis = 1)
        else:
            ##### 20 == hidden_size * 2
            before_softmax = tf.reshape(tf.keras.layers.Dense(1)(
                tf.reshape(quest_hiddens, [-1, 128 * 2])
                ), 
                                    [tf.shape(quest_hiddens)[0], 
                                     tf.shape(quest_hiddens)[1]])
        

            inputs = before_softmax
            mask = tf.sequence_mask(self.x2_length, tf.shape(self.x2_emb)[1], dtype = tf.float32)
            #### [B, N2]
            attn_q_merged_weights = tf.keras.Sequential([tf.keras.layers.Masking(mask_value=0.), 
                                                 tf.keras.layers.Softmax(axis = 1)])(inputs * mask)
            question_hidden = tf.reduce_sum(quest_hiddens * tf.expand_dims(attn_q_merged_weights, axis = 2), axis = 1)
        
        
        question_hidden_tiled = tf.tile(tf.expand_dims(question_hidden, axis = 1),
        [1, tf.shape(doc_hiddens)[1], 1])
        #### [B, N1]
        #start_score_before = tf.squeeze(produce_attn_layer("start_score_before", doc_hiddens, question_hidden_tiled, 1), -1)
        start_score_before = produce_attn_layer("start_score_before", doc_hiddens, question_hidden_tiled, 100)
        start_score_before = tf.reshape(tf.keras.layers.Dense(1)(
                tf.reshape(start_score_before, [-1, 100])
                ), 
                                    [tf.shape(start_score_before)[0], 
                                     tf.shape(start_score_before)[1]])
        
        
        inputs = start_score_before
        mask = tf.sequence_mask(self.x1_length, tf.shape(self.x1_emb)[1], dtype = tf.float32)
        #### [B, N1]
        start_score_before_softmax = tf.keras.Sequential([tf.keras.layers.Masking(mask_value=0.)])(inputs * mask)
        
        #end_score_before = tf.squeeze(produce_attn_layer("end_score_before", doc_hiddens, question_hidden_tiled, 1), -1)
        end_score_before = produce_attn_layer("end_score_before", doc_hiddens, question_hidden_tiled, 100)
        end_score_before = tf.reshape(tf.keras.layers.Dense(1)(
                tf.reshape(end_score_before, [-1, 100])
                ), 
                                    [tf.shape(end_score_before)[0], 
                                     tf.shape(end_score_before)[1]])
        
        inputs = end_score_before
        mask = tf.sequence_mask(self.x1_length, tf.shape(self.x1_emb)[1], dtype = tf.float32)
        #### [B, N1]
        end_score_before_softmax = tf.keras.Sequential([tf.keras.layers.Masking(mask_value=0.)])(inputs * mask)
        
        self.start_score_before_softmax, self.end_score_before_softmax = start_score_before_softmax, end_score_before_softmax
    
    def loss_op(self):
        self.start_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.target_s, logits=self.start_score_before_softmax)
        self.end_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.target_e, logits=self.end_score_before_softmax)
        self.loss = tf.reduce_mean(self.start_loss) + tf.reduce_mean(self.end_loss)
        #self.loss = tf.reduce_mean(self.end_loss)
    
    def softmax_pred_op(self):
        self.labels_softmax_start = tf.argmax(self.start_score_before_softmax, axis=-1)
        self.labels_softmax_start = tf.cast(self.labels_softmax_start, tf.int32)
        
        self.labels_softmax_end = tf.argmax(self.end_score_before_softmax, axis=-1)
        self.labels_softmax_end = tf.cast(self.labels_softmax_end, tf.int32)
    
    def trainstep_op(self):
        with tf.compat.v1.variable_scope("train_step"):
            self.global_step = tf.Variable(0, name="global_step", trainable=False)
            if self.optimizer == 'Adam':
                optim = tf.compat.v1.train.AdamOptimizer(learning_rate=self.lr_pl)
            elif self.optimizer == 'Adadelta':
                optim = tf.compat.v1.train.AdadeltaOptimizer(learning_rate=self.lr_pl)
            elif self.optimizer == 'Adagrad':
                optim = tf.compat.v1.train.AdagradOptimizer(learning_rate=self.lr_pl)
            elif self.optimizer == 'RMSProp':
                optim = tf.compat.v1.train.RMSPropOptimizer(learning_rate=self.lr_pl)
            elif self.optimizer == 'Momentum':
                optim = tf.compat.v1.train.MomentumOptimizer(learning_rate=self.lr_pl, momentum=0.9)
            elif self.optimizer == 'SGD':
                optim = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=self.lr_pl)
            else:
                optim = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=self.lr_pl)
                
            self.train_op = optim.minimize(self.loss)
            
            '''
            grads_and_vars = optim.compute_gradients(self.loss)
            grads_and_vars_clip = [[tf.clip_by_value(g, -self.clip_grad, self.clip_grad), v] for g, v in grads_and_vars]
            self.train_op = optim.apply_gradients(grads_and_vars_clip, global_step=self.global_step)
            '''
            
    def init_op(self):
        self.init_op = tf.compat.v1.global_variables_initializer()
        
    def add_summary(self, sess):
        """

        :param sess:
        :return:
        """
        self.merged = tf.compat.v1.summary.merge_all()
        self.file_writer = tf.compat.v1.summary.FileWriter(self.summary_path, sess.graph)


# In[11]:


len(word_dict)


# In[12]:


EMB_DIM, VOC_SIZE, F_DIM = 300, len(word_dict), len(feature_dict)
embedding_path = "glove_embedding.pkl"
assert os.path.exists(embedding_path)
import pickle as pkl
with open(embedding_path, "rb") as f:
    embeddings_np = pkl.load(f)
#embeddings_np = np.random.random([VOC_SIZE, EMB_DIM])
assert embeddings_np.shape == (VOC_SIZE, EMB_DIM)


# In[13]:


args_dict


# In[14]:


tf.compat.v1.disable_eager_execution()


# In[15]:


rnnReader_ext = RnnDocReader_tf(embeddings_np, num_features=F_DIM)


# In[16]:


len(dev_exs)


# In[17]:


#train_gen = batch_samples_gen(train_exs[:1000], epoch_num=1000)
train_gen = batch_samples_gen(train_exs, epoch_num=1000, batch_size=32)
from drqa.reader.utils import load_data
from collections import namedtuple
load_data_args = namedtuple("load_data_args", ["uncased_question", "uncased_doc"])
load_data_args.uncased_doc = False
load_data_args.uncased_question = False
#json_file_name = "/home/svjack/temp_dir/DrQA/DrQA/data/datasets/SQuAD-v1.1-dev-processed-corenlp.txt" 
#json_file_name = "notebook_dumps/SQuAD-v1.1-dev-processed-corenlp.txt"
json_file_name = "../data/datasets/SQuAD-v1.1-dev-processed-corenlp.txt"
dev_exs = load_data(load_data_args ,json_file_name, skip_no_answer=True)


# In[18]:


len(dev_exs)


# In[ ]:

#mode = "re-train"
mode = "train"
assert mode in ["train", "re-train"]

sess = tf.compat.v1.Session()

if mode == "train":
    sess.run(tf.compat.v1.global_variables_initializer())
    
    model_path = "save"
    import shutil, os
    if os.path.exists(model_path):
        shutil.rmtree(model_path)
    os.mkdir(model_path)
    model_path = "save/model"
    os.mkdir(model_path)
    
    saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables(), max_to_keep = 5)
else:
    assert mode == "re-train"
    import os, shutil
    assert os.path.exists("save")
    model_path = "save"
    latest_path = tf.compat.v1.train.latest_checkpoint(model_path)
    
    saver = tf.compat.v1.train.Saver()
    saver.restore(sess, latest_path)
model_path  = "save/model"

def retrieve_conclusion(model ,ele, with_train = True):
    x1, x1_f, x1_length, x2, x2_length, target_s, target_e = ele
    if with_train:
        dropout_pl_np = 0.6
    else:
        dropout_pl_np = 0.0
    
    lr_pl_np = 0.0001
    feed_dict = {
    model.x1:x1, 
    model.x2:x2, 
    model.x1_f:x1_f.reshape([len(x1), -1]), 
    model.x1_length:x1_length, 
    model.x2_length:x2_length,
    model.target_s:target_s,
    model.target_e:target_e,
    model.dropout_pl:dropout_pl_np,
    model.lr_pl:lr_pl_np
    }
    
    if with_train:
        _, loss, labels_softmax_start, labels_softmax_end = sess.run(
        [model.train_op, 
         model.loss, 
         model.labels_softmax_start,
        model.labels_softmax_end,
        ],        
    feed_dict = feed_dict)
    else:
        loss, labels_softmax_start, labels_softmax_end = sess.run(
        [ 
         model.loss, 
         model.labels_softmax_start,
        model.labels_softmax_end,
        ],        
    feed_dict = feed_dict)
        
    start_score = np.asarray(labels_softmax_start == target_s, dtype = int).sum() / len(labels_softmax_start)
    end_score = np.asarray(labels_softmax_end == target_e, dtype = int).sum() / len(labels_softmax_end)
    
    return model ,loss, start_score, end_score 

def retrieve_conclusion_with_output(model ,ele, with_train = True):
    x1, x1_f, x1_length, x2, x2_length, target_s, target_e = ele
    if with_train:
        dropout_pl_np = 0.6
    else:
        dropout_pl_np = 0.0
    
    lr_pl_np = 0.0001
    feed_dict = {
    model.x1:x1, 
    model.x2:x2, 
    model.x1_f:x1_f.reshape([len(x1), -1]), 
    model.x1_length:x1_length, 
    model.x2_length:x2_length,
    model.target_s:target_s,
    model.target_e:target_e,
    model.dropout_pl:dropout_pl_np,
    model.lr_pl:lr_pl_np
    }
    
    if with_train:
        _, loss, labels_softmax_start, labels_softmax_end = sess.run(
        [model.train_op, 
         model.loss, 
         model.labels_softmax_start,
        model.labels_softmax_end,
        ],        
    feed_dict = feed_dict)
    else:
        loss, labels_softmax_start, labels_softmax_end = sess.run(
        [ 
         model.loss, 
         model.labels_softmax_start,
        model.labels_softmax_end,
        ],        
    feed_dict = feed_dict)
        
    start_score = np.asarray(labels_softmax_start == target_s, dtype = int).sum() / len(labels_softmax_start)
    end_score = np.asarray(labels_softmax_end == target_e, dtype = int).sum() / len(labels_softmax_end)
    score = (np.asarray(labels_softmax_start == target_s, dtype = int) * np.asarray(labels_softmax_end == target_e, dtype = int)).sum() / len(labels_softmax_end)
    
    return model ,loss, start_score, end_score, score, labels_softmax_start, labels_softmax_end

from shutil import copyfile 
log_file = "log.txt"
f = open(log_file, "w")
f.close()

best_dev_loss = np.inf
best_start, best_end = 0, 0
pred_save_format = "pred_start_{}_end_{}.pkl"

for idx ,ele in enumerate(train_gen):
    rnnReader_ext ,train_loss, train_start_score, train_end_score = retrieve_conclusion(rnnReader_ext, ele, with_train=True)
    
    #if (idx + 1) % 100 == 0:
    if (idx) % 100 == 0:
         f = open(log_file, "r")
         have_text = f.read() + "\n"
         f.close()
         have_text += "idx {} train_loss: {}, start: {}, end: {}\n dev_loss: {} start: {}, end: {}".format(idx, 
                train_loss, train_start_score, train_end_score, None, None, None
                                                                                                  )
         f = open(log_file, "w")
         f.write(have_text)
         f.close()  
         copyfile(log_file, log_file.replace(".txt", "_cp.txt"))                                               
         print("idx {} train_loss: {}, start: {}, end: {}\n dev_loss: {} start: {}, end: {}".format(idx, 
                train_loss, train_start_score, train_end_score, None, None, None
                                                                                                  ))
    
    #if (idx + 1) % 1000 == 0:
    if (idx) % 3000 == 0:
        dev_gen = batch_samples_gen(dev_exs[:100000], epoch_num=1, batch_size=32)
        dev_loss_list, dev_s_list, dev_e_list = [], [], []
        start_pred_list, end_pred_list = [], []
        dev_list = []
        for d_ele in dev_gen:
            #rnnReader_ext ,dev_loss, dev_start_score, dev_end_score = retrieve_conclusion(rnnReader_ext, d_ele, with_train=False)
            rnnReader_ext ,dev_loss, dev_start_score, dev_end_score, dev_score, start_pred, end_pred = retrieve_conclusion_with_output(rnnReader_ext, d_ele, with_train=False)
            dev_loss_list.append(dev_loss)
            dev_s_list.append(dev_start_score)
            dev_e_list.append(dev_end_score)
            
            dev_list.append(dev_score)
            
            start_pred_list.extend(start_pred.tolist())
            end_pred_list.extend(end_pred.tolist())
            
        dev_loss, dev_s, dev_e, dev_ = map(lambda l: pd.Series(l).mean(), [dev_loss_list, dev_s_list, dev_e_list, dev_list])
        #print(idx, train_loss, start_score, end_score)
        f = open(log_file, "r")
        have_text = f.read() + "\n"
        f.close()
        have_text += "idx {} train_loss: {}, start: {}, end: {}\n dev_loss: {} start: {}, end: {}, score: {}".format(idx, 
                train_loss, train_start_score, train_end_score, dev_loss, dev_s, dev_e, dev_
                                                                                                  )
        with open(pred_save_format.format(dev_s, dev_e), "wb") as f:
            pkl.dump({
                    "start":start_pred_list,
                    "end":end_pred_list
                    }, f)
        
        f = open(log_file, "w")
        f.write(have_text)
        f.close()
        copyfile(log_file, log_file.replace(".txt", "_cp.txt"))
        #if dev_loss < best_dev_loss:
        if best_start < dev_s or best_end < dev_e:
            #best_dev_loss = dev_loss
            best_start = dev_s
            best_end = dev_e
            saver.save(sess, model_path, idx)  
        print("idx {} train_loss: {}, start: {}, end: {}\n dev_loss: {} start: {}, end: {}, score: {}".format(idx, 
                train_loss, train_start_score, train_end_score, dev_loss, dev_s, dev_e, dev_
                                                                                                  ))







file load from ../notebook_dumps/args.json
file load from ../notebook_dumps/word_dict.pkl
file load from ../notebook_dumps/feature_dict.pkl
file load from ../notebook_dumps/dev_exs.pkl
