In [1]:
# encoding = utf8
'''
    @Author: King
    @Date: 2019.05.16
    @Purpose: Attention-Based-BiLSTM-relation-extraction
    @Introduction:  Attention-Based-BiLSTM-relation-extraction
    @Datasets: Chinese relation extration datasets
    @Link : 论文研读笔记作业-https://mp.weixin.qq.com/s?__biz=MzAxMTU5Njg4NQ==&mid=100001827&idx=2&sn=27cd33fa69eaf376a92352f65b293e90
    @Reference : https://github.com/SeoSangwoo/Attention-Based-BiLSTM-relation-extraction
    @paper ： https://www.aclweb.org/anthology/P16-2034
'''

'\n    @Author: King\n    @Date: 2019.05.16\n    @Purpose: Attention-Based-BiLSTM-relation-extraction\n    @Introduction:  Attention-Based-BiLSTM-relation-extraction\n    @Datasets: Chinese relation extration datasets\n    @Link : \n    @Reference : https://github.com/SeoSangwoo/Attention-Based-BiLSTM-relation-extraction\n'

## Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification

Tensorflow Implementation of Deep Learning Approach for Relation Extraction Challenge(SemEval-2010 Task #8: Multi-Way Classification of Semantic Relations Between Pairs of Nominals) via Attention-based BiLSTM.

Original paper [Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification](http://anthology.aclweb.org/P16-2034) 

<p align="center">
	<img width="700" height="400" src="img/Attention-Based-BiLSTM-relation-extraction.png">
</p>

### Requrements

* Python (>=3.5)

* TensorFlow (>=r1.0)

* scikit-learn (>=0.18)

### 1、Settings Class

In [3]:
class Settings(object):
    def __init__(self):
        
        '''
            Data loading params 
        '''
        ## Path of train data
        self.train_path = "E:/pythonWp/game/CCKS2019/RelationshipExtraction/origin_data/sample10_test_sent.txt"
        # Path of test data
        self.test_path = "E:/pythonWp/game/CCKS2019/RelationshipExtraction/origin_data/sample10_test_sent.txt"
        # Path of relation2id data
        self.relation2id_path = "E:/pythonWp/nlp/relation_extraction/Information-Extraction-Chinese_suss/RE_BGRU_2ATT/origin_data/relation2id.txt"
        # Max sentence length in data
        self.max_sentence_length = 90
        # Percentage of the training data to use for validation
        self.dev_sample_percentage = 0.1
        
        '''
            Model Hyper-parameters 
        '''
        '''
            1、Embeddings
        '''
        # Path of pre-trained word embeddings 
        self.embedding_path = "E:/pythonWp/nlp/relation_extraction/Information-Extraction-Chinese_suss/RE_BGRU_2ATT/origin_data/vec_char.txt"
        # Dimensionality of word embedding (default: 100)
        self.embedding_dim = 100
        # Dropout keep probability of embedding layer (default: 0.7)
        self.emb_dropout_keep_prob = 0.7
        
        '''
            2、AttLSTM
        '''
        # Dimensionality of RNN hidden (default: 100)
        self.hidden_size = 100
        # Dropout keep probability of RNN (default: 0.7)
        self.rnn_dropout_keep_prob = 0.7
        
        '''
            3、Misc
        '''
        # Description for model
        self.desc = ""
        # Dropout keep probability of RNN (default: 0.7)
        self.dropout_keep_prob = 0.5
        # L2 regularization lambda (default: 1e-5)
        self.l2_reg_lambda = 1e-5
        
        '''
            4、Training parameters
        '''
        # Description for model
        self.batch_size = 10
        # Number of training epochs (Default: 100)
        self.num_epochs = 5
        # Number of iterations to display training information
        self.display_every = 5
        # Evaluate model on dev set after this many steps (default: 100)
        self.evaluate_every = 100
        # Number of checkpoints to store (default: 5)
        self.num_checkpoints = 5
        # Which learning rate to start with (Default: 1.0)
        self.learning_rate = 1.0
        # Decay rate for learning rate (Default: 0.9)
        self.decay_rate = 0.9
        
        '''
            5、Testing parameters
        '''
        # Checkpoint directory from training run
        self.checkpoint_dir = ""
        
        '''
            6、Misc Parameters
        '''
        # Allow device soft device placement
        self.allow_soft_placement = True
        # Log placement of ops on devices
        self.log_device_placement = False
        # Allow gpu memory growth
        self.gpu_allow_growth = True
        

### 2、数据处理模型

In [4]:
import numpy as np
import pandas as pd 
'''
    工具包 begin
'''
import sys
if sys.version_info[0] > 2:
    is_py3 = True
else:
    reload(sys)
    sys.setdefaultencoding("utf-8")
    is_py3 = False

def native_word(word, encoding='utf-8'):
    """如果在python2下面使用python3训练的模型，可考虑调用此函数转化一下字符编码"""
    if not is_py3:
        return word.encode(encoding)
    else:
        return word

def native_content(content):
    if not is_py3:
        return content.decode('utf-8')
    else:
        return content

def open_file(filename, mode='r'):
    """
    常用文件操作，可在python2和python3间切换.
    mode: 'r' or 'w' for read or write
    """
    if is_py3:
        return open(filename, mode, encoding='utf-8', errors='ignore')
    else:
        return open(filename, mode)

'''
    工具包 end
'''


'\n    工具包 end\n'

In [5]:
#读取 relation2id 文件数据
def load_relation2id_file_cn(filename,demo_flag = False):
    '''
    读取 data 文件数据
    :param filename:    String 文件名称包含路径
    :param demo_flag:   String True 只读取 1000 样本数据，Fasle 读取全部数据
    :return:
        relation2id:   dict    relation to id
        id2relation:   list    id to relation 
    '''
    contents_num = 0
    relation2id = {}
    id2relation = []
    with open_file(filename) as f:
        for line in f:
            try:
                data_list = line.replace("\n","").split("\t")
                relation2id[data_list[0]] = int(data_list[1])
                id2relation.append(data_list[0])
                contents_num = contents_num + 1
                if demo_flag and contents_num == 500:
                    break
            except:
                pass
    return relation2id,id2relation

In [28]:
# 读取 训练集 文件数据
def load_data_and_labels_cn(path,settings):
    relation2id,id2relation=load_relation2id_file_cn(filename=settings.relation2id_path)   
    data = []

    df = pd.read_csv(path,quoting = 3, sep='\t',names=['e1','e2','r','s'])
    ''' print("df:{0}".format(df.iloc[0:2]))
        output:
            df:    
            e1  e2    r                                                  s
            0  李烈钧  王侃  NaN              李烈钧加入同盟会:光绪三十三年（1907年），经张断、王侃介绍加入同盟会。
            1   陈尸  孔子  NaN  子服景伯把这件事告诉给孔子，并且说：“季孙氏已经被公伯寮迷惑了，我的力量能够把公伯寮杀了，把...
    '''
    max_sentence_length = 0
    sentence_list = df['s'].tolist()
    for i in range(0,len(sentence_list)):
        tokens = str(sentence_list[i])
        if max_sentence_length < len(tokens):
            max_sentence_length = len(tokens)
        sentence_list[i] = " ".join(tokens)
    ''' print("sentence_list:{0}".format(sentence_list[0:2]))
        print("max sentence length = {}\n".format(max_sentence_length))
        print("df:{0}".format(df.iloc[0:2]))
        output
            sentence_list:['李 烈 钧 加 入 同 盟 会 : 光 绪 三 十 三 年 （ 1 9 0 7 年 ） ， 经 张 断 、 王 侃 介 绍 加 入 同 盟 会  。', '子 服 景 伯 把 这 件 事 告 诉 给 孔 子 ， 并 且 说 ： “ 季 孙 氏 已 经 被 公 伯 寮 迷 惑 了 ， 我 的 力 量 能 够 把 公 伯 寮 杀 了 ， 把 他 陈 尸 于 市 。 ”']
            max sentence length = 19751

            df:    e1  e2    r                                                  s
            0  李烈钧  王侃  NaN              李烈钧加入同盟会:光绪三十三年（1907年），经张断、王侃介绍加入同盟会。
            1   陈尸  孔子  NaN  子服景伯把这件事告诉给孔子，并且说：“季孙氏已经被公伯寮迷惑了，我的力量能够把公伯寮杀了，把...
    '''

    df = df.fillna('NA')            # 将省缺值用 ‘NAN’ 代替

    df['label'] = [relation2id[str(r)] for r in df['r']]
    ''' print("df:{0}".format(df.iloc[0:2]))
        output:
            df:    
                    e1  e2   r                                                  s  label
            0  李烈钧  王侃  NA              李烈钧加入同盟会:光绪三十三年（1907年），经张断、王侃介绍加入同盟会。      0
            1   陈尸  孔子  NA  子服景伯把这件事告诉给孔子，并且说：“季孙氏已经被公伯寮迷惑了，我的力量能够把公伯寮杀了，把...      0
    '''

    # Text Data
    x_text = sentence_list
    ''' print("x_text:{0}".format(x_text[0:1]))
        sys.exit(0)
        output:
            x_text:['李 烈 钧 加 入 同 盟 会 : 光 绪 三 十 三 年 （ 1 9 0 7 年 ） ， 经 张 断 、 王 侃 介 绍 加 入 同 盟 会 。', '子 服 景 伯 把 这 件 事 告 诉 给 孔 子 ， 并 且 说 ： “ 季 孙 氏 已 经 被 公 伯 寮 迷 惑 了 ， 我 的 力 量 能 够 把 公 伯 寮 杀 了 ， 把 他 陈 尸 于 市 。 ”', '剪 辑 ： 朱 小 勤 、 苏 鸿 文', '区 域 创 新 体 系 的 若 干 文 献 综 述 （ 陈 广 胜 许 小 忠 徐 燕 椿 ）', '在 拍 摄 间 隙 的 时 候 ， 谭 松 韵 与 郭 俊 辰 经 常 一 起 吃 辣 条 。']
    '''
    # Label Data
    y = df['label']
    labels_flat = y.values.ravel()
    labels_count = np.unique(labels_flat).shape[0]
    print("labels_flat:{0}".format(labels_flat))
    print("labels_count:{0}".format(labels_count))
    ''' print("labels_flat:{0}".format(labels_flat))
        print("labels_count:{0}".format(labels_count))
        output:
            labels_flat:[ 0  0  0 ...  0 29 29]
            labels_count:35
    '''

    # convert class labels from scalars to one-hot vectors
    # 0  => [1 0 0 0 0 ... 0 0 0 0 0]
    # 1  => [0 1 0 0 0 ... 0 0 0 0 0]
    # ...
    # 18 => [0 0 0 0 0 ... 0 0 0 0 1]
    def dense_to_one_hot(labels_dense, num_classes):
        num_labels = labels_dense.shape[0]
        index_offset = np.arange(num_labels) * num_classes
        labels_one_hot = np.zeros((num_labels, num_classes))
        labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
        return labels_one_hot

    labels = dense_to_one_hot(labels_flat, labels_count)
    labels = labels.astype(np.uint8)
    ''' print("x_text:{0}".format(x_text[0:1]))
        print("labels:{0}".format(labels[0:1]))
        print("len(x_text):{0}".format(len(x_text)))
        print("len(labels):{0}".format(len(labels)))
        sys.exit(0)
        output:
            x_text:['李 烈 钧 加 入 同 盟 会 : 光 绪 三 十 三 年 （ 1 9 0 7 年 ） ， 经 张 断 、 王 侃 介 绍 加 入 同 盟 会 。', '子 服 景 伯 把 这 件 事 告 诉 给 孔 子 ， 并 且 说 ： “ 季 孙 氏 已 经 被 公 伯 寮 迷 惑 了 ， 我 的 力 量 能 够 把 公 伯 寮 杀 了 ， 把 他 陈 尸 于 市 。 ”', '剪 辑 ： 朱 小 勤 、 苏 鸿 文', '区 域 创 新 体 系 的 若 干 文 献 综 述 （ 陈 广 胜 许 小 忠 徐 燕 椿 ）', '在 拍 摄 间 隙 的 时 候 ， 谭 松 韵 与 郭 俊 辰 经 常 一 起 吃 辣 条 。']
            labels:[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
            len(x_text):37637
            len(labels):37637
    '''
    return x_text, labels

In [7]:
settings =Settings()
load_data_and_labels_cn(path=settings.train_path,settings=settings)
print("")

labels_flat:[10  0  0 ...  0  0  0]
labels_count:35



In [8]:
def batch_iter(data, batch_size, num_epochs, shuffle=True):
    """
    Generates a batch iterator for a dataset.
    """
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int((len(data) - 1) / batch_size) + 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index]

### 3、Attention  Based BiLSTM relation extraction 模型

In [9]:
# 导入库
import tensorflow as tf
import numpy as np
import os
import datetime
import time
from sklearn.metrics import f1_score
import warnings
import sklearn.exceptions

  from ._conv import register_converters as _register_converters


####  3.1 attention模块

In [10]:
def attention(inputs):
    # Trainable parameters
    hidden_size = inputs.shape[2].value
    u_omega = tf.get_variable("u_omega", [hidden_size], initializer=tf.keras.initializers.glorot_normal())

    with tf.name_scope('v'):
        v = tf.tanh(inputs)

    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    vu = tf.tensordot(v, u_omega, axes=1, name='vu')  # (B,T) shape
    alphas = tf.nn.softmax(vu, name='alphas')  # (B,T) shape

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)

    # Final output with tanh
    output = tf.tanh(output)

    return output, alphas

#### 3.2 AttLSTM 模块

In [11]:
class AttLSTM:
    def __init__(self, sequence_length, num_classes, vocab_size, embedding_size,
                 hidden_size, l2_reg_lambda=0.0):
        # Placeholders for input, output and dropout
        self.input_text = tf.placeholder(tf.int32, shape=[None, sequence_length], name='input_text')
        self.input_y = tf.placeholder(tf.float32, shape=[None, num_classes], name='input_y')
        self.emb_dropout_keep_prob = tf.placeholder(tf.float32, name='emb_dropout_keep_prob')
        self.rnn_dropout_keep_prob = tf.placeholder(tf.float32, name='rnn_dropout_keep_prob')
        self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')

        initializer = tf.keras.initializers.glorot_normal

        # Word Embedding Layer
        with tf.device('/cpu:0'), tf.variable_scope("word-embeddings"):
            self.W_text = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -0.25, 0.25), name="W_text")
            self.embedded_chars = tf.nn.embedding_lookup(self.W_text, self.input_text)

        # Dropout for Word Embedding
        with tf.variable_scope('dropout-embeddings'):
            self.embedded_chars = tf.nn.dropout(self.embedded_chars, self.emb_dropout_keep_prob)

        # Bidirectional LSTM
        with tf.variable_scope("bi-lstm"):
            _fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, initializer=initializer())
            fw_cell = tf.nn.rnn_cell.DropoutWrapper(_fw_cell, self.rnn_dropout_keep_prob)
            _bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, initializer=initializer())
            bw_cell = tf.nn.rnn_cell.DropoutWrapper(_bw_cell, self.rnn_dropout_keep_prob)
            self.rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_cell,
                                                                  cell_bw=bw_cell,
                                                                  inputs=self.embedded_chars,
                                                                  sequence_length=self._length(self.input_text),
                                                                  dtype=tf.float32)
            self.rnn_outputs = tf.add(self.rnn_outputs[0], self.rnn_outputs[1])

        # Attention
        with tf.variable_scope('attention'):
            self.attn, self.alphas = attention(self.rnn_outputs)

        # Dropout
        with tf.variable_scope('dropout'):
            self.h_drop = tf.nn.dropout(self.attn, self.dropout_keep_prob)

        # Fully connected layer
        with tf.variable_scope('output'):
            self.logits = tf.layers.dense(self.h_drop, num_classes, kernel_initializer=initializer())
            self.predictions = tf.argmax(self.logits, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.variable_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.input_y)
            self.l2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * self.l2

        # Accuracy
        with tf.variable_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name="accuracy")

    # Length of the sequence data
    @staticmethod
    def _length(seq):
        relevant = tf.sign(tf.abs(seq))
        length = tf.reduce_sum(relevant, reduction_indices=1)
        length = tf.cast(length, tf.int32)
        return length


### 4、词嵌入加载

In [12]:
def load_word2vec(embedding_path, embedding_dim, vocab):
    # initial matrix with random uniform
    initW = np.random.randn(len(vocab.vocabulary_), embedding_dim).astype(np.float32) / np.sqrt(len(vocab.vocabulary_))
    # load any vectors from the word2vec
    print("Load glove file {0}".format(embedding_path))
    f = open(embedding_path, 'r', encoding='utf8')
    for line in f:
        splitLine = line.split(' ')
        word = splitLine[0]
        embedding = np.asarray(splitLine[1:], dtype='float32')
        idx = vocab.vocabulary_.get(word)
        if idx != 0:
            initW[idx] = embedding
    return initW

### 5、模型训练

In [13]:
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)
settings =Settings()

def train(settings):
    with tf.device('/cpu:0'):
        x_text, y = load_data_and_labels_cn(path=settings.train_path,settings=settings)


    vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(settings.max_sentence_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))
    print("Text Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
    print("x = {0}".format(x.shape))
    print("y = {0}".format(y.shape))
    
    # Randomly shuffle data to split into train and test(dev)
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(settings.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
    
    
    with tf.Graph().as_default():
        # GPU 配置项
        session_conf = tf.ConfigProto(
            allow_soft_placement=settings.allow_soft_placement,
            log_device_placement=settings.log_device_placement)
        session_conf.gpu_options.allow_growth = settings.gpu_allow_growth

        sess = tf.Session(config=session_conf)
        with sess.as_default():
            model = AttLSTM(
                sequence_length=x_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=settings.embedding_dim,
                hidden_size=settings.hidden_size,
                l2_reg_lambda=settings.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdadeltaOptimizer(settings.learning_rate, settings.decay_rate, 1e-6)
            gvs = optimizer.compute_gradients(model.loss)
            capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var) for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            # 用于保存模型
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=settings.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            if settings.embedding_path:
                pretrain_W = load_word2vec(settings.embedding_path, settings.embedding_dim, vocab_processor)
                sess.run(model.W_text.assign(pretrain_W))
                print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = batch_iter(list(zip(x_train, y_train)), settings.batch_size, settings.num_epochs)
            # Training loop. For each batch...
            best_f1 = 0.0  # For save checkpoint(model)
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                # Train
                feed_dict = {
                    model.input_text: x_batch,
                    model.input_y: y_batch,
                    model.emb_dropout_keep_prob: settings.emb_dropout_keep_prob,
                    model.rnn_dropout_keep_prob: settings.rnn_dropout_keep_prob,
                    model.dropout_keep_prob: settings.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, model.loss, model.accuracy], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % settings.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))

                # Evaluation
                if step % settings.evaluate_every == 0:
                    print("\nEvaluation:")
                    feed_dict = {
                        model.input_text: x_dev,
                        model.input_y: y_dev,
                        model.emb_dropout_keep_prob: 1.0,
                        model.rnn_dropout_keep_prob: 1.0,
                        model.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, predictions = sess.run(
                        [dev_summary_op, model.loss, model.accuracy, model.predictions], feed_dict)
                    dev_summary_writer.add_summary(summaries, step)

                    time_str = datetime.datetime.now().isoformat()
                    f1 = f1_score(np.argmax(y_dev, axis=1), predictions, labels=np.array(range(1, 35)), average="macro")
                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                    print("[UNOFFICIAL] (2*9+1)-Way Macro-Average F1 Score (excluding Other): {:g}\n".format(f1))

                    # Model checkpoint
                    if best_f1 < f1:
                        best_f1 = f1
                        path = saver.save(sess, checkpoint_prefix + "-{:.3g}".format(best_f1), global_step=step)
                        print("Saved model checkpoint to {}\n".format(path))
    


In [14]:
train(settings)

labels_flat:[10  0  0 ...  0  0  0]
labels_count:35
Instructions for updating:
Please use tensorflow/transform or tf.data.
Instructions for updating:
Please use tensorflow/transform or tf.data.
Instructions for updating:
Please use tensorflow/transform or tf.data.
Text Vocabulary Size: 4022
x = (4818, 90)
y = (4818, 35)
Writing to E:\pythonWp\nlp\relation_extraction\relation_extraction_study\Attention-Based-BiLSTM-relation-extraction\runs\1558165255

Load glove file E:/pythonWp/nlp/relation_extraction/Information-Extraction-Chinese_suss/RE_BGRU_2ATT/origin_data/vec_char.txt
Success to load pre-trained word2vec model!

2019-05-18T15:40:59.368118: step 5, loss 3.28395, acc 0.8
2019-05-18T15:40:59.631307: step 10, loss 1.65106, acc 0.8
2019-05-18T15:40:59.911558: step 15, loss 2.82888, acc 0.5
2019-05-18T15:41:00.192807: step 20, loss 1.62322, acc 0.7
2019-05-18T15:41:00.483292: step 25, loss 1.9708, acc 0.6
2019-05-18T15:41:00.754233: step 30, loss 1.45214, acc 0.8
2019-05-18T15:41:01.05

2019-05-18T15:41:26.458361: step 500, loss 0.998784, acc 0.796258
[UNOFFICIAL] (2*9+1)-Way Macro-Average F1 Score (excluding Other): 0.0151579

Saved model checkpoint to E:\pythonWp\nlp\relation_extraction\relation_extraction_study\Attention-Based-BiLSTM-relation-extraction\runs\1558165255\checkpoints\model-0.0152-500

2019-05-18T15:41:27.354337: step 505, loss 0.442802, acc 0.9
2019-05-18T15:41:27.598686: step 510, loss 1.12604, acc 0.8
2019-05-18T15:41:27.830575: step 515, loss 0.604998, acc 0.9
2019-05-18T15:41:28.073789: step 520, loss 0.742013, acc 0.8
2019-05-18T15:41:28.329289: step 525, loss 1.47512, acc 0.8
2019-05-18T15:41:28.582612: step 530, loss 1.89703, acc 0.6
2019-05-18T15:41:28.873830: step 535, loss 0.558553, acc 0.8
2019-05-18T15:41:29.107570: step 540, loss 1.62561, acc 0.5
2019-05-18T15:41:29.347449: step 545, loss 0.395527, acc 1
2019-05-18T15:41:29.586104: step 550, loss 1.71424, acc 0.6
2019-05-18T15:41:29.850418: step 555, loss 1.036, acc 0.7
2019-05-18T15:41:3

2019-05-18T15:41:58.499165: step 1065, loss 0.675149, acc 0.9
2019-05-18T15:41:58.755537: step 1070, loss 1.48391, acc 0.7
2019-05-18T15:41:59.002900: step 1075, loss 1.71185, acc 0.6
2019-05-18T15:41:59.213773: step 1080, loss 1.20818, acc 0.7
2019-05-18T15:41:59.462109: step 1085, loss 1.48186, acc 0.4
2019-05-18T15:41:59.707453: step 1090, loss 0.505844, acc 0.9
2019-05-18T15:41:59.956304: step 1095, loss 1.43047, acc 0.8
2019-05-18T15:42:00.222419: step 1100, loss 1.04392, acc 0.7

Evaluation:
2019-05-18T15:42:00.685862: step 1100, loss 0.930688, acc 0.798337
[UNOFFICIAL] (2*9+1)-Way Macro-Average F1 Score (excluding Other): 0.0203169

2019-05-18T15:42:00.923663: step 1105, loss 0.328514, acc 1
2019-05-18T15:42:01.171002: step 1110, loss 0.909372, acc 0.8
2019-05-18T15:42:01.410686: step 1115, loss 0.333004, acc 0.9
2019-05-18T15:42:01.650719: step 1120, loss 0.825487, acc 0.9
2019-05-18T15:42:01.894860: step 1125, loss 1.10445, acc 0.7
2019-05-18T15:42:02.143119: step 1130, loss 1

Saved model checkpoint to E:\pythonWp\nlp\relation_extraction\relation_extraction_study\Attention-Based-BiLSTM-relation-extraction\runs\1558165255\checkpoints\model-0.0333-1600

2019-05-18T15:42:29.263627: step 1605, loss 0.592621, acc 0.8
2019-05-18T15:42:29.514274: step 1610, loss 0.491172, acc 0.9
2019-05-18T15:42:29.768738: step 1615, loss 0.817733, acc 0.9
2019-05-18T15:42:30.042006: step 1620, loss 1.40799, acc 0.7
2019-05-18T15:42:30.292440: step 1625, loss 0.811716, acc 0.8
2019-05-18T15:42:30.535800: step 1630, loss 0.246832, acc 1
2019-05-18T15:42:30.763183: step 1635, loss 0.840492, acc 0.7
2019-05-18T15:42:30.998782: step 1640, loss 0.558978, acc 0.8
2019-05-18T15:42:31.239130: step 1645, loss 0.969361, acc 0.8
2019-05-18T15:42:31.543254: step 1650, loss 0.691575, acc 0.8
2019-05-18T15:42:31.830141: step 1655, loss 0.572422, acc 0.7
2019-05-18T15:42:32.087050: step 1660, loss 0.953631, acc 0.8
2019-05-18T15:42:32.343556: step 1665, loss 0.291146, acc 1
2019-05-18T15:42:32.6

2019-05-18T15:42:59.608096: step 2150, loss 0.324538, acc 0.9
2019-05-18T15:42:59.859538: step 2155, loss 0.408016, acc 1
2019-05-18T15:43:00.115736: step 2160, loss 0.789831, acc 0.7
2019-05-18T15:43:00.360092: step 2165, loss 0.867749, acc 0.8
2019-05-18T15:43:00.619250: step 2170, loss 0.467613, acc 0.857143


### 6、模型评估

In [34]:
def eval(settings):
    print("---eval---")
    with tf.device('/cpu:0'):
        x_text, y = load_data_and_labels_cn(path=settings.test_path,settings=settings)

    print("len(x_text):{0}".format(len(x_text)))
    print("len(y):{0}".format(len(y)))
    
    
    # Map data into vocabulary
    text_path = os.path.join(settings.checkpoint_dir, "..", "vocab")
    text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(text_path)
    x = np.array(list(text_vocab_processor.transform(x_text)))

    checkpoint_file = tf.train.latest_checkpoint(settings.checkpoint_dir)

    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=settings.allow_soft_placement,
            log_device_placement=settings.log_device_placement)
        session_conf.gpu_options.allow_growth = settings.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_text = graph.get_operation_by_name("input_text").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            emb_dropout_keep_prob = graph.get_operation_by_name("emb_dropout_keep_prob").outputs[0]
            rnn_dropout_keep_prob = graph.get_operation_by_name("rnn_dropout_keep_prob").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]

            # Generate batches for one epoch
            batches = batch_iter(list(x), settings.batch_size, 1, shuffle=False)

            # Collect the predictions here
            preds = []
            for x_batch in batches:
                pred = sess.run(predictions, {input_text: x_batch,
                                              emb_dropout_keep_prob: 1.0,
                                              rnn_dropout_keep_prob: 1.0,
                                              dropout_keep_prob: 1.0})
                preds.append(pred)
            preds = np.concatenate(preds)
            truths = np.argmax(y, axis=1)

            relation2id,id2relation=load_relation2id_file_cn(filename=settings.relation2id_path)   
            prediction_path = os.path.join(settings.checkpoint_dir, "..", "predictions.txt")
            predictionClass_path = os.path.join(settings.checkpoint_dir, "..", "predictionsClass.txt")
            truth_path = os.path.join(settings.checkpoint_dir, "..", "ground_truths.txt")
            prediction_file = open_file(prediction_path, 'w')
            predictionClass_file = open_file(predictionClass_path, 'w')
            truth_file = open_file(truth_path, 'w')
            for i in range(len(preds)):
                prediction_file.write("{}\t{}\n".format(i, id2relation[preds[i]]))
                predictionClass_file.write("{0}\t{1}\n".format(i, preds[i]))
                truth_file.write("{}\t{}\n".format(i, id2relation[truths[i]]))
            prediction_file.close()
            truth_file.close()

#             perl_path = os.path.join(os.path.curdir,
#                                      "SemEval2010_task8_all_data",
#                                      "SemEval2010_task8_scorer-v1.2",
#                                      "semeval2010_task8_scorer-v1.2.pl")
#             process = subprocess.Popen(["perl", perl_path, prediction_path, truth_path], stdout=subprocess.PIPE)
#             for line in str(process.communicate()[0].decode("utf-8")).split("\\n"):
#                 print(line)

In [35]:
settings.checkpoint_dir = "runs/1558165255/checkpoints/"
settings.test_path="E:/pythonWp/game/CCKS2019/RelationshipExtraction/origin_data/test_f_char.txt"
eval(settings=settings)

---eval---
labels_flat:[0 0 0 ... 0 0 0]
labels_count:1
len(x_text):77092
len(y):77092
INFO:tensorflow:Restoring parameters from E:\pythonWp\nlp\relation_extraction\relation_extraction_study\Attention-Based-BiLSTM-relation-extraction\runs\1558165255\checkpoints\model-0.0911-1900
