In [1]:
"""
    Named Entity Recognition 

        Author : Sangkeun Jung (2017)
        - using Tensorflow
"""

import sys, os, inspect

# add common to path
from pathlib import Path
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
common_path = str(Path(currentdir).parent.parent)
sys.path.append( common_path )

from common.nlp.vocab import Vocab
from common.nlp.data_loader import N2NTextData
from common.nlp.converter import N2NConverter

from dataset import NERDataset
from dataset import load_data
from common.ml.hparams import HParams

import numpy as np
import copy 
import time 
import tensorflow as tf
from tensorflow.python.ops import rnn
from tensorflow.contrib.layers.python.layers import linear
from tensorflow.python.ops import variable_scope
from tensorflow.contrib.seq2seq import sequence_loss#sequence의 loss의 평균값을 구할 수있음

from common.ml.tf.deploy import freeze_graph



print( "Tensorflow Version : ", tf.__version__)

class NER():
    def __init__(self, hps, mode="train"):
        self.hps = hps
        self.x = tf.placeholder(tf.int32,   [None, hps.num_steps], name="pl_tokens")
        self.y = tf.placeholder(tf.int32,   [None, hps.num_steps], name="pl_target")
        self.w = tf.placeholder(tf.float32, [None, hps.num_steps], name="pl_weight")
        self.keep_prob = tf.placeholder(tf.float32, [], name="pl_keep_prob")

        ### 4 blocks ###
        # 1) embedding
        # 2) dropout on input embedding
        # 3) sentence encoding using rnn
        # 4) bidirectional rnn's output to target classes
        # 5) loss calcaulation

        def _embedding(x):
            # character embedding 
            shape       = [hps.vocab_size, hps.emb_size]
            initializer = tf.initializers.variance_scaling(distribution="uniform", dtype=tf.float32)
            emb_mat     = tf.get_variable("emb", shape, initializer=initializer, dtype=tf.float32)
            input_emb   = tf.nn.embedding_lookup(emb_mat, x)   # [batch_size, sent_len, emb_dim]

            # split input_emb -> num_steps
            step_inputs = tf.unstack(input_emb, axis=1)
            return step_inputs

        def _sequence_dropout(step_inputs, keep_prob):
            # apply dropout to each input
            # input : a list of input tensor which shape is [None, input_dim]
            with tf.name_scope('sequence_dropout') as scope:
                step_outputs = []
                for t, input in enumerate(step_inputs):
                    step_outputs.append( tf.nn.dropout(input, keep_prob) )
            return step_outputs

        def sequence_encoding_n2n(step_inputs, seq_length, cell_size):
            # birnn based N2N encoding and output
            f_rnn_cell = tf.contrib.rnn.GRUCell(cell_size, reuse=False)
            b_rnn_cell = tf.contrib.rnn.GRUCell(cell_size, reuse=False)
            _inputs    = tf.stack(step_inputs, axis=1)

            # step_inputs = a list of [batch_size, emb_dim]
            # input = [batch_size, num_step, emb_dim]
            # np.stack( [a,b,c,] )
            outputs, states, = tf.nn.bidirectional_dynamic_rnn( f_rnn_cell,
                                                                b_rnn_cell,
                                                                _inputs,
                                                                sequence_length=tf.cast(seq_length, tf.int64),
                                                                time_major=False,
                                                                dtype=tf.float32,
                                                                scope='birnn',
                                                            )
            output_fw, output_bw = outputs
            states_fw, states_bw = states 

            output       = tf.concat([output_fw, output_bw], 2)
            step_outputs = tf.unstack(output, axis=1)

            final_state  = tf.concat([states_fw, states_bw], 1)
            return step_outputs # a list of [batch_size, enc_dim]

        def _to_class_n2n(step_inputs, num_class):
            T = len(step_inputs)
            step_output_logits = []
            for t in range(T):
                # encoder to linear(map)
                out = step_inputs[t]
                if t==0: out = linear(out, num_class, scope="Rnn2Target")
                else:    out = linear(out, num_class, scope="Rnn2Target", reuse=True)
                step_output_logits.append(out)
            return step_output_logits

        def _loss(step_outputs, step_refs, weights):
            # step_outputs : a list of [batch_size, num_class] float32 - unscaled logits
            # step_refs    : [batch_size, num_steps] int32
            # weights      : [batch_size, num_steps] float32
            # calculate sequence wise loss function using cross-entropy
            _batch_output_logits = tf.stack(step_outputs, axis=1)
            loss = sequence_loss(
                                    logits=_batch_output_logits,        
                                    targets=step_refs,
                                    weights=weights
                                )
            return loss
        
        seq_length    = tf.reduce_sum(self.w, 1) # [batch_size]

        step_inputs       = _embedding(self.x)
        step_inputs       = _sequence_dropout(step_inputs, self.keep_prob)
        step_enc_outputs  = sequence_encoding_n2n(step_inputs, seq_length, hps.enc_dim)
        step_outputs      = _to_class_n2n(step_enc_outputs, hps.num_target_class)

        self.loss = _loss(step_outputs, self.y, self.w)

        # step_preds and step_out_probs
        step_out_probs = []
        step_out_preds = []
        for _output in step_outputs:
            _out_probs  = tf.nn.softmax(_output)
            _out_pred   = tf.argmax(_out_probs, 1)

            step_out_probs.append(_out_probs)
            step_out_preds.append(_out_pred)

        # stack for interface
        self.step_out_probs = tf.stack(step_out_probs, axis=1, name="step_out_probs")
        self.step_out_preds = tf.stack(step_out_preds, axis=1, name="step_out_preds")

        self.global_step = tf.get_variable("global_step", [], tf.int32, initializer=tf.zeros_initializer, trainable=False)

        if mode == "train":
            optimizer       = tf.train.AdamOptimizer(hps.learning_rate)
            self.train_op   = optimizer.minimize(self.loss, global_step=self.global_step)
        else:
            self.train_op = tf.no_op()

        for v in tf.trainable_variables(): print(v.name)

    @staticmethod
    def get_default_hparams():
        return HParams(
            learning_rate     = 0.01,
            keep_prob         = 0.5,
        )


def train(train_id_data, num_vocabs, num_taget_class):
    #
    # train sentiment analysis using given train_id_data
    #
    max_epoch = 1000
    model_dir = "./trained_models"
    hps = NER.get_default_hparams()
    hps.update(
                    batch_size= 50,
                    num_steps = 85,
                    emb_size  = 40,
                    enc_dim   = 50,
                    vocab_size=num_vocabs,
                    num_target_class=num_taget_class
               )

    with tf.variable_scope("model"):
        model = NER(hps, "train")

    sv = tf.train.Supervisor(is_chief=True,
                             logdir=model_dir,
                             summary_op=None,  
                             global_step=model.global_step)

    # tf assign compatible operators for gpu and cpu 
    tf_config = tf.ConfigProto(allow_soft_placement=True)

    with sv.managed_session(config=tf_config) as sess:
        local_step       = 0
        prev_global_step = sess.run(model.global_step)

        train_data_set = NERDataset(train_id_data, hps.batch_size, hps.num_steps)
        losses = []
        while not sv.should_stop():
            fetches = [model.global_step, model.loss, model.train_op]
            a_batch_data = next( train_data_set.iterator )
            y, x, w = a_batch_data
            fetched = sess.run(fetches, {
                                            model.x: x, 
                                            model.y: y, 
                                            model.w: w,

                                            model.keep_prob: hps.keep_prob,
                                        }
                              )

            local_step += 1

            _global_step = fetched[0]
            _loss        = fetched[1]
            losses.append( _loss )
            if local_step < 10 or local_step % 10 == 0:
                epoch = train_data_set.get_epoch_num()
                print("Epoch = {:3d} Step = {:7d} loss = {:5.3f}".format(epoch, _global_step, np.mean(losses)) )
                _loss = []                
                if epoch >= max_epoch : break 

        print("Training is done.")
    sv.stop()

    # model.out_pred, model.out_probs
    freeze_graph(model_dir, "model/step_out_preds,model/step_out_probs", "frozen_graph.tf.pb") ## freeze graph with params to probobuf format
    
from tensorflow.core.framework import graph_pb2
def predict(token_vocab, target_vocab, sent):
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force to use cpu only (prediction)
    model_dir = "./trained_models"

    # prepare sentence converting
    # to make raw sentence to id data easily
    pred_data     = N2NTextData(sent, mode='sentence')
    pred_id_data  = N2NConverter.convert(pred_data, target_vocab, token_vocab)
    pred_data_set = NERDataset(pred_id_data, 1, 85)
    #
    a_batch_data = next(pred_data_set.predict_iterator) # a result
    b_nes_id, b_token_ids, b_weight = a_batch_data

    # Restore graph
    # note that frozen_graph.tf.pb contains graph definition with parameter values in binary format
    _graph_fn =  os.path.join(model_dir, 'frozen_graph.tf.pb')
    with tf.gfile.GFile(_graph_fn, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)

    with tf.Session(graph=graph) as sess:
        # to check load graph
        #for n in tf.get_default_graph().as_graph_def().node: print(n.name)

        # make interface for input
        pl_token     = graph.get_tensor_by_name('import/model/pl_tokens:0')
        pl_weight    = graph.get_tensor_by_name('import/model/pl_weight:0')
        pl_keep_prob = graph.get_tensor_by_name('import/model/pl_keep_prob:0')

        # make interface for output
        step_out_preds = graph.get_tensor_by_name('import/model/step_out_preds:0')
        step_out_probs = graph.get_tensor_by_name('import/model/step_out_probs:0')
        

        # predict sentence 
        b_best_step_pred_indexs, b_step_pred_probs = sess.run([step_out_preds, step_out_probs], 
                                                              feed_dict={
                                                                            pl_token  : b_token_ids,
                                                                            pl_weight : b_weight,
                                                                            pl_keep_prob : 1.0,
                                                                        }
                                                             )
        best_step_pred_indexs = b_best_step_pred_indexs[0]
        step_pred_probs = b_step_pred_probs[0]

        step_best_targets = []
        step_best_target_probs = []
        for time_step, best_pred_index in enumerate(best_step_pred_indexs):
            _target_class = target_vocab.get_symbol(best_pred_index)
            step_best_targets.append( _target_class )
            _prob = step_pred_probs[time_step][best_pred_index]
            step_best_target_probs.append( _prob ) 

        for idx, char in enumerate(list(sent)):
            print('{}\t{}\t{}'.format(char, step_best_targets[idx], step_best_target_probs[idx]) ) 
        #return list(sent)



if __name__ == '__main__':
    train_id_data, token_vocab, target_vocab = load_data()
    num_vocabs       = token_vocab.get_num_tokens()
    num_target_class = target_vocab.get_num_targets()

    train_data_set = NERDataset(train_id_data, 5, 85)
    train(train_id_data, num_vocabs, num_target_class)
    
    #predict(token_vocab, target_vocab, '아프가니스탄의 장래를 더욱 불투명하게 하는 것은 강경파 헤즈비 이슬라미와 우즈베크 민병대의 대립이다.')


  from ._conv import register_converters as _register_converters


Tensorflow Version :  1.10.0
<common.nlp.data_loader.N2NTextData object at 0x00000221A4039F28>
Instructions for updating:
seq_dim is deprecated, use seq_axis instead
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
model/emb:0
model/birnn/fw/gru_cell/gates/kernel:0
model/birnn/fw/gru_cell/gates/bias:0
model/birnn/fw/gru_cell/candidate/kernel:0
model/birnn/fw/gru_cell/candidate/bias:0
model/birnn/bw/gru_cell/gates/kernel:0
model/birnn/bw/gru_cell/gates/bias:0
model/birnn/bw/gru_cell/candidate/kernel:0
model/birnn/bw/gru_cell/candidate/bias:0
model/Rnn2Target/weights:0
model/Rnn2Target/biases:0
Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
INFO:tensorflow:Restoring parameters from ./trained_models\model.ckpt-0
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path ./trained_models\model.ckpt
INFO:tensorflow:Starting qu

Epoch = 146 Step =    1530 loss = 0.062
Epoch = 147 Step =    1540 loss = 0.062
Epoch = 148 Step =    1550 loss = 0.061
Epoch = 149 Step =    1560 loss = 0.061
Epoch = 150 Step =    1570 loss = 0.061
Epoch = 151 Step =    1580 loss = 0.060
Epoch = 152 Step =    1590 loss = 0.060
Epoch = 153 Step =    1600 loss = 0.060
Epoch = 154 Step =    1610 loss = 0.060
Epoch = 155 Step =    1620 loss = 0.059
Epoch = 156 Step =    1630 loss = 0.059
Epoch = 157 Step =    1640 loss = 0.059
Epoch = 158 Step =    1650 loss = 0.058
Epoch = 159 Step =    1660 loss = 0.058
Epoch = 160 Step =    1670 loss = 0.058
Epoch = 160 Step =    1680 loss = 0.058
Epoch = 161 Step =    1690 loss = 0.057
Epoch = 162 Step =    1700 loss = 0.057
Epoch = 163 Step =    1710 loss = 0.057
Epoch = 164 Step =    1720 loss = 0.057
Epoch = 165 Step =    1730 loss = 0.056
Epoch = 166 Step =    1740 loss = 0.056
Epoch = 167 Step =    1750 loss = 0.056
Epoch = 168 Step =    1760 loss = 0.056
Epoch = 169 Step =    1770 loss = 0.055


Epoch = 340 Step =    3570 loss = 0.036
Epoch = 341 Step =    3580 loss = 0.036
Epoch = 342 Step =    3590 loss = 0.036
Epoch = 343 Step =    3600 loss = 0.036
Epoch = 344 Step =    3610 loss = 0.036
Epoch = 345 Step =    3620 loss = 0.036
Epoch = 346 Step =    3630 loss = 0.036
Epoch = 347 Step =    3640 loss = 0.036
Epoch = 348 Step =    3650 loss = 0.036
Epoch = 349 Step =    3660 loss = 0.035
Epoch = 350 Step =    3670 loss = 0.035
Epoch = 351 Step =    3680 loss = 0.035
Epoch = 352 Step =    3690 loss = 0.035
Epoch = 353 Step =    3700 loss = 0.035
Epoch = 354 Step =    3710 loss = 0.035
Epoch = 355 Step =    3720 loss = 0.035
Epoch = 356 Step =    3730 loss = 0.035
Epoch = 357 Step =    3740 loss = 0.035
Epoch = 358 Step =    3750 loss = 0.035
Epoch = 359 Step =    3760 loss = 0.035
Epoch = 360 Step =    3770 loss = 0.035
Epoch = 360 Step =    3780 loss = 0.035
Epoch = 361 Step =    3790 loss = 0.035
Epoch = 362 Step =    3800 loss = 0.035
Epoch = 363 Step =    3810 loss = 0.035


Epoch = 535 Step =    5610 loss = 0.031
Epoch = 536 Step =    5620 loss = 0.031
Epoch = 537 Step =    5630 loss = 0.031
Epoch = 538 Step =    5640 loss = 0.031
Epoch = 539 Step =    5650 loss = 0.031
Epoch = 540 Step =    5660 loss = 0.031
Epoch = 540 Step =    5670 loss = 0.030
Epoch = 541 Step =    5680 loss = 0.030
Epoch = 542 Step =    5690 loss = 0.030
Epoch = 543 Step =    5700 loss = 0.030
Epoch = 544 Step =    5710 loss = 0.030
Epoch = 545 Step =    5720 loss = 0.030
Epoch = 546 Step =    5730 loss = 0.030
Epoch = 547 Step =    5740 loss = 0.030
Epoch = 548 Step =    5750 loss = 0.030
Epoch = 549 Step =    5760 loss = 0.030
Epoch = 550 Step =    5770 loss = 0.030
Epoch = 551 Step =    5780 loss = 0.030
INFO:tensorflow:model/global_step/sec: 16.2322
Epoch = 552 Step =    5790 loss = 0.030
Epoch = 553 Step =    5800 loss = 0.030
Epoch = 554 Step =    5810 loss = 0.030
Epoch = 555 Step =    5820 loss = 0.030
Epoch = 556 Step =    5830 loss = 0.030
Epoch = 557 Step =    5840 loss =

Epoch = 729 Step =    7650 loss = 0.036
Epoch = 730 Step =    7660 loss = 0.036
Epoch = 731 Step =    7670 loss = 0.036
Epoch = 732 Step =    7680 loss = 0.036
Epoch = 733 Step =    7690 loss = 0.036
Epoch = 734 Step =    7700 loss = 0.036
Epoch = 735 Step =    7710 loss = 0.037
Epoch = 736 Step =    7720 loss = 0.037
Epoch = 737 Step =    7730 loss = 0.037
INFO:tensorflow:model/global_step/sec: 16.258
Epoch = 738 Step =    7740 loss = 0.037
Epoch = 739 Step =    7750 loss = 0.037
Epoch = 740 Step =    7760 loss = 0.037
Epoch = 740 Step =    7770 loss = 0.038
Epoch = 741 Step =    7780 loss = 0.038
Epoch = 742 Step =    7790 loss = 0.038
Epoch = 743 Step =    7800 loss = 0.038
Epoch = 744 Step =    7810 loss = 0.038
Epoch = 745 Step =    7820 loss = 0.039
Epoch = 746 Step =    7830 loss = 0.039
Epoch = 747 Step =    7840 loss = 0.039
Epoch = 748 Step =    7850 loss = 0.039
Epoch = 749 Step =    7860 loss = 0.039
Epoch = 750 Step =    7870 loss = 0.040
Epoch = 751 Step =    7880 loss = 

Epoch = 923 Step =    9690 loss = 0.098
Epoch = 924 Step =    9700 loss = 0.099
Epoch = 925 Step =    9710 loss = 0.099
Epoch = 926 Step =    9720 loss = 0.099
INFO:tensorflow:model/global_step/sec: 16.618
INFO:tensorflow:Saving checkpoint to path ./trained_models\model.ckpt
Epoch = 927 Step =    9730 loss = 0.100
Epoch = 928 Step =    9740 loss = 0.100
Epoch = 929 Step =    9750 loss = 0.101
Epoch = 930 Step =    9760 loss = 0.101
Epoch = 931 Step =    9770 loss = 0.101
Epoch = 932 Step =    9780 loss = 0.102
Epoch = 933 Step =    9790 loss = 0.102
Epoch = 934 Step =    9800 loss = 0.102
Epoch = 935 Step =    9810 loss = 0.103
Epoch = 936 Step =    9820 loss = 0.103
Epoch = 937 Step =    9830 loss = 0.103
Epoch = 938 Step =    9840 loss = 0.104
Epoch = 939 Step =    9850 loss = 0.104
Epoch = 940 Step =    9860 loss = 0.104
Epoch = 940 Step =    9870 loss = 0.105
Epoch = 941 Step =    9880 loss = 0.105
Epoch = 942 Step =    9890 loss = 0.105
Epoch = 943 Step =    9900 loss = 0.106
Epoc

In [3]:
predict(token_vocab, target_vocab, '가족이랑 겨울에 3박4일 여행')

가	B-AC	0.8060729503631592
족	I-AC	0.9727397561073303
이	O	0.9209614992141724
랑	O	0.9622595310211182
 	O	0.969157874584198
겨	B-DT	0.8904657959938049
울	I-DT	0.9489538669586182
에	O	0.8773403167724609
 	O	0.9541662335395813
3	B-PR	0.7125526070594788
박	I-PR	0.9858400821685791
4	I-PR	0.9686891436576843
일	I-PR	0.9736447930335999
 	O	0.958454430103302
여	O	0.9833940863609314
행	O	0.9892008304595947


In [7]:
predict(token_vocab, target_vocab, '여자친구랑 휴양지 가서 힐링하고 싶어요! 9월15일부터 18일까지')

여	O	0.8624535799026489
자	I-AC	0.9172770977020264
친	I-AC	0.6635997891426086
구	I-AC	0.9188944697380066
랑	O	0.9151795506477356
 	O	0.9791615009307861
휴	B-PU	0.8528434038162231
양	I-PU	0.8842093348503113
지	O	0.5214310884475708
 	O	0.9751706123352051
가	O	0.9164026975631714
서	O	0.9763439893722534
 	O	0.9904068112373352
힐	B-PU	0.9648532867431641
링	I-PU	0.7901679873466492
하	O	0.9669514298439026
고	O	0.8655789494514465
 	O	0.9966482520103455
싶	O	0.8834100365638733
어	O	0.9749166369438171
요	O	0.9904847145080566
!	O	0.9736834168434143
 	O	0.9930007457733154
9	B-DT	0.8449887633323669
월	I-DT	0.9530041813850403
1	O	0.44898349046707153
5	I-DT	0.37916913628578186
일	I-DT	0.47819021344184875
부	O	0.5371277928352356
터	O	0.9179531931877136
 	O	0.903346836566925
1	O	0.4468589723110199
8	O	0.681887686252594
일	I-PR	0.9567570686340332
까	O	0.8890085220336914
지	O	0.9437020421028137
