In [1]:
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM
from keras_contrib.layers import CRF
import pickle
from keras.preprocessing.sequence import pad_sequences
import numpy as np

Using TensorFlow backend.


In [8]:
def loadmodel():
    # load model 通过load_weightes导入权重
    EMBED_DIM = 20
    BiRNN_UNITS = 100

    with open('model/config.pkl', 'rb') as inp:
        (vocab, chunk_tags) = pickle.load(inp)

    model = Sequential()
    model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True))  # Random embedding
    model.add(Bidirectional(LSTM(BiRNN_UNITS, return_sequences=True)))
    crf = CRF(len(chunk_tags), sparse_target=True)
    model.add(crf)
    model.summary()
    model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy])
    model.load_weights('model/crf_company.h5')
    return model

In [9]:
def process_data(data, vocab, word2idx, maxlen=100):
    x = [word2idx.get(w[0].lower(), 1) for w in data]
    length = len(x)
    x = pad_sequences([x], maxlen)  # left padding
    return x, length

In [10]:
def predicttext(vocab,word2idx,model,predict_text):
    xdata, length = process_data(str(predict_text), vocab, word2idx)
    raw = model.predict(xdata)[0][-length:]
    result = [np.argmax(row) for row in raw]
    result_tags = [chunk_tags[i] for i in result]
    keyword = ''.join([predict_text[i] for i in range(len(predict_text)) if result_tags[i]!='O']) 
    return keyword

In [11]:
with open('model/config.pkl', 'rb') as inp:
    (vocab, chunk_tags) = pickle.load(inp)
word2idx = dict((w, i) for i, w in enumerate(vocab))
model = loadmodel()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 20)          13300     
_________________________________________________________________
bidirectional_2 (Bidirection (None, None, 200)         96800     
_________________________________________________________________
crf_2 (CRF)                  (None, None, 4)           828       
Total params: 110,928
Trainable params: 110,928
Non-trainable params: 0
_________________________________________________________________


In [12]:
predict_text = '北京人寿保险公司'
keyword = predicttext(vocab,word2idx,model,predict_text)

In [13]:
keyword

'北京人寿'