In [1]:
import logging
import os

import pytest
from rasa_nlu import data_router, config
from rasa_nlu.components import ComponentBuilder
from rasa_nlu.model import Trainer
from rasa_nlu.utils import zip_folder
from rasa_nlu import training_data

from sagas.provider.hanlp_utils import Hanlp

# logging.basicConfig(level="DEBUG")
logging.basicConfig(level="INFO")
CONFIG_DEFAULTS_PATH = "sample_configs/config_defaults.yml"
DEFAULT_DATA_PATH = "data/examples/rasa/demo-rasa.json"
TEST_MODEL_PATH = "test_models/test_model_spacy_sklearn"

def component_builder():
    return ComponentBuilder()
def hanlp(component_builder, default_config):
    return component_builder.create_component("sagas.provider.hanlp_utils.Hanlp", default_config)
def timenlp(component_builder, default_config):
    return component_builder.create_component("sagas.provider.time_extractor.TimeExtractor", default_config)

def default_config():
    return config.load(CONFIG_DEFAULTS_PATH)

# component_classes = [Hanlp]
# registered_components = {c.name: c for c in component_classes}
hanlp=hanlp(component_builder(), default_config())
timenlp=timenlp(component_builder(), default_config())

  from ._conv import register_converters as _register_converters
INFO:sagas.provider.hanlp_utils:Trying to connect hanlp rpc with address 'localhost:10052'


In [18]:
import sagas.provider.hanlp_entity_extractor
import imp
imp.reload(sagas.provider.hanlp_entity_extractor)

<module 'sagas.provider.hanlp_entity_extractor' from '/Users/xiaofeiwu/jcloud/assets/langs/workspace/rasa/stack/sagas/provider/hanlp_entity_extractor.py'>

In [24]:
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.extractors.spacy_entity_extractor import SpacyEntityExtractor
from rasa_nlu.training_data import TrainingData, Message
from sagas.provider.hanlp_entity_extractor import HanlpEntityExtractor

def test_hanlp_ner_extractor(text, hanlp, hanlp_doc):
    ext = HanlpEntityExtractor()
    
    example = Message(text, {
        "intent": "wish",
        "entities": [],
        "hanlp_doc": hanlp_doc})

    ext.process(example, hanlp=hanlp.nlp)

    print("total entities", len(example.get("entities", [])))
    for ent in example.get("entities"):
        print(ent)

text="我的希望是希望张晚霞的背影被晚霞映红"
test_hanlp_ner_extractor(text, hanlp, hanlp.doc_for_text(text))
text="蓝翔给宁夏固原市彭阳县红河镇黑牛沟村捐赠了挖掘机"
test_hanlp_ner_extractor(text, hanlp, hanlp.doc_for_text(text))

total entities 1
{'entity': 'person', 'value': '张晚霞', 'start': 7, 'confidence': None, 'end': 10, 'extractor': 'ner_hanlp'}
total entities 6
{'entity': 'person', 'value': '蓝翔', 'start': 0, 'confidence': None, 'end': 2, 'extractor': 'ner_hanlp'}
{'entity': 'location', 'value': '宁夏', 'start': 3, 'confidence': None, 'end': 5, 'extractor': 'ner_hanlp'}
{'entity': 'location', 'value': '固原市', 'start': 5, 'confidence': None, 'end': 8, 'extractor': 'ner_hanlp'}
{'entity': 'location', 'value': '彭阳县', 'start': 8, 'confidence': None, 'end': 11, 'extractor': 'ner_hanlp'}
{'entity': 'location', 'value': '红河镇', 'start': 11, 'confidence': None, 'end': 14, 'extractor': 'ner_hanlp'}
{'entity': 'location', 'value': '黑牛沟村', 'start': 14, 'confidence': None, 'end': 18, 'extractor': 'ner_hanlp'}


In [26]:
from sagas.provider.amount_extractor import AmountExtractor

def test_amount_ner_extractor(text, hanlp, hanlp_doc):
    ext = AmountExtractor()
    
    example = Message(text, {
        "intent": "wish",
        "entities": [],
        "hanlp_doc": hanlp_doc})

    ext.process(example, hanlp=hanlp.nlp)

    print("total entities", len(example.get("entities", [])))
    for ent in example.get("entities"):
        print(ent)

text="十九元套餐包括什么"
test_amount_ner_extractor(text, hanlp, hanlp.doc_for_text(text))
text="牛奶三〇〇克*2"
test_amount_ner_extractor(text, hanlp, hanlp.doc_for_text(text))

total entities 1
{'entity': 'amount', 'value': '19', 'start': 0, 'confidence': None, 'end': 2, 'extractor': 'ner_amount'}
total entities 2
{'entity': 'amount', 'value': '300', 'start': 2, 'confidence': None, 'end': 5, 'extractor': 'ner_amount'}
{'entity': 'amount', 'value': '2', 'start': 7, 'confidence': None, 'end': 8, 'extractor': 'ner_amount'}


In [22]:
from sagas.provider.hanlp_tokenizer import HanlpTokenizer
def test_hanlp_tokenizer(text, hanlp, hanlp_doc):
    ext = HanlpTokenizer()
    
    example = Message(text, {
        "intent": "wish",
        "entities": [],
        "hanlp_doc": hanlp_doc})

    ext.process(example, hanlp=hanlp)
    for token in example.get("tokens"):
        print(token.text, token.offset)

# text="我的希望是希望张晚霞的背影被晚霞映红"
text="我想去吃兰州拉面"
test_hanlp_tokenizer(text, hanlp, hanlp.doc_for_text(text))

我 0
想 1
去 2
吃 3
兰州 4
拉面 6


In [17]:
import sagas.provider.time_extractor
import imp
imp.reload(sagas.provider.time_extractor)

<module 'sagas.provider.time_extractor' from '/Users/xiaofeiwu/jcloud/assets/langs/workspace/rasa/stack/sagas/provider/time_extractor.py'>

In [19]:
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.training_data import TrainingData, Message

CONFIG_ZH_PATH = "sample_configs/config_zh.yml"
def test_time_entity_extractor(component_builder):
    # _config = RasaNLUModelConfig({"pipeline": [{"name": "sagas.provider.time_extractor.TimeExtractor"}]})
    # _config.set_component_attr("ner_time", dimensions=["time"], host="unknown")
    _config=config.load(CONFIG_ZH_PATH)
    c = component_builder.create_component("sagas.provider.time_extractor.TimeExtractor", _config)
    message = Message("周五下午7点到8点")
    c.process(message)
    entities = message.get("entities")
    print("total entities", len(entities))

    # Test with a defined date
    # 1381536182000 == 2013/10/12 02:03:02
    message = Message("本周日到下周日出差", time="1381536182000")
    c.process(message)
    entities = message.get("entities")
    print("total entities", len(entities))
    for ent in entities:
        print(ent)

test_time_entity_extractor(component_builder())

INFO:root:time-nlp gateway host/port: 127.0.0.1 25333


total entities 2
total entities 2
{'entity': 'time', 'value': '2019-01-06 00:00:00', 'start': 0, 'confidence': None, 'end': 3, 'additional_info': 'True', 'extractor': 'sagas.provider.time_extractor.TimeExtractor'}
{'entity': 'time', 'value': '2019-01-13 00:00:00', 'start': 4, 'confidence': None, 'end': 7, 'additional_info': 'True', 'extractor': 'sagas.provider.time_extractor.TimeExtractor'}
