In [1]:
import logging
import os

import pytest
from rasa_nlu import data_router, config
from rasa_nlu.components import ComponentBuilder
from rasa_nlu.model import Trainer
from rasa_nlu.utils import zip_folder
from rasa_nlu import training_data

logging.basicConfig(level="DEBUG")

CONFIG_DEFAULTS_PATH = "sample_configs/config_defaults.yml"

DEFAULT_DATA_PATH = "data/examples/rasa/demo-rasa.json"

TEST_MODEL_PATH = "test_models/test_model_spacy_sklearn"


@pytest.fixture(scope="session")
def component_builder():
    return ComponentBuilder()

In [20]:
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.extractors.spacy_entity_extractor import SpacyEntityExtractor
from rasa_nlu.training_data import TrainingData, Message

CONFIG_ZH_PATH = "sample_configs/config_duckling.yml"
def test_duckling_entity_extractor(component_builder):
    # _config = RasaNLUModelConfig({"pipeline": [{"name": "ner_duckling_http", "url":"http://localhost:8000"}]})
    # _config.set_component_attr("ner_duckling_http", dimensions=["time"])
    _config=config.load(CONFIG_ZH_PATH)
    duckling = component_builder.create_component("ner_duckling_http", _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    print(len(entities))
    for ent in entities:
        print(ent)

    # Test duckling with a defined date

    # 1381536182000 == 2013/10/12 02:03:02
    # message = Message("Let us meet tomorrow.", time="1381536182000")
    # 因为在DucklingHTTPExtractor._reference_time_from_message的方法里对time字段乘上了1000, 
    # 所以这里不再是<unix epoch in millisecond>, 而是秒
    message = Message("Let us meet tomorrow.", time="1381536182")
    duckling.process(message)
    entities = message.get("entities")
    print(len(entities))
    print(entities[0]["text"])
    print(entities[0]["value"])

test_duckling_entity_extractor(component_builder())

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.0.1
DEBUG:urllib3.connectionpool:http://127.0.0.1:8000 "POST /parse HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.0.1
DEBUG:urllib3.connectionpool:http://127.0.0.1:8000 "POST /parse HTTP/1.1" 200 None


3
{'start': 0, 'end': 5, 'text': 'Today', 'value': '2019-01-04T00:00:00.000-08:00', 'confidence': 1.0, 'additional_info': {'values': [{'value': '2019-01-04T00:00:00.000-08:00', 'grain': 'day', 'type': 'value'}], 'value': '2019-01-04T00:00:00.000-08:00', 'grain': 'day', 'type': 'value'}, 'entity': 'time', 'extractor': 'ner_duckling_http'}
{'start': 9, 'end': 23, 'text': 'the 5th of May', 'value': '2019-05-05T00:00:00.000-07:00', 'confidence': 1.0, 'additional_info': {'values': [{'value': '2019-05-05T00:00:00.000-07:00', 'grain': 'day', 'type': 'value'}, {'value': '2020-05-05T00:00:00.000-07:00', 'grain': 'day', 'type': 'value'}, {'value': '2021-05-05T00:00:00.000-07:00', 'grain': 'day', 'type': 'value'}], 'value': '2019-05-05T00:00:00.000-07:00', 'grain': 'day', 'type': 'value'}, 'entity': 'time', 'extractor': 'ner_duckling_http'}
{'start': 37, 'end': 45, 'text': 'tomorrow', 'value': '2019-01-05T00:00:00.000-08:00', 'confidence': 1.0, 'additional_info': {'values': [{'value': '2019-01-05

In [24]:
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.extractors.spacy_entity_extractor import SpacyEntityExtractor
from rasa_nlu.training_data import TrainingData, Message

CONFIG_FR_PATH = "sample_configs/config_fr.yml"
def test_duckling_fr(component_builder):
    # _config = RasaNLUModelConfig({"pipeline": [{"name": "ner_duckling_http", "url":"http://localhost:8000"}]})
    # _config.set_component_attr("ner_duckling_http", dimensions=["time"])
    _config=config.load(CONFIG_FR_PATH)
    duckling = component_builder.create_component("ner_duckling_http", _config)
    message = Message("Demain à midi")
    duckling.process(message)
    entities = message.get("entities")
    print(len(entities))
    for ent in entities:
        print(ent)

test_duckling_fr(component_builder())        

DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): localhost
DEBUG:urllib3.connectionpool:http://localhost:8000 "POST /parse HTTP/1.1" 200 None
DEBUG:chardet.charsetprober:EUC-JP Japanese prober hit error at byte 18
DEBUG:chardet.charsetprober:EUC-KR Korean prober hit error at byte 18
DEBUG:chardet.charsetprober:EUC-TW Taiwan prober hit error at byte 17
DEBUG:chardet.charsetprober:utf-8  confidence = 0.505
DEBUG:chardet.charsetprober:SHIFT_JIS Japanese confidence = 0.01
DEBUG:chardet.charsetprober:EUC-JP not active
DEBUG:chardet.charsetprober:GB2312 Chinese confidence = 0.01
DEBUG:chardet.charsetprober:EUC-KR not active
DEBUG:chardet.charsetprober:CP949 Korean confidence = 0.01
DEBUG:chardet.charsetprober:Big5 Chinese confidence = 0.01
DEBUG:chardet.charsetprober:EUC-TW not active
DEBUG:chardet.charsetprober:windows-1251 Russian confidence = 0.01
DEBUG:chardet.charsetprober:KOI8-R Russian confidence = 0.01
DEBUG:chardet.charsetprober:ISO-8859-5 Russian confidence = 0.01
DEB

1
{'start': 0, 'end': 13, 'text': 'Demain Ã\xa0 midi', 'value': '2019-01-05T12:00:00.000-08:00', 'confidence': 1.0, 'additional_info': {'values': [{'value': '2019-01-05T12:00:00.000-08:00', 'grain': 'hour', 'type': 'value'}], 'value': '2019-01-05T12:00:00.000-08:00', 'grain': 'hour', 'type': 'value'}, 'entity': 'time', 'extractor': 'ner_duckling_http'}


In [11]:
from duckling import DucklingWrapper
d = DucklingWrapper()
print(d.parse_time(u'Let\'s meet at 11:45am'))
# [{u'dim': u'time', u'end': 21, u'start': 11, u'value': {u'value': u'2016-10-14T11:45:00.000-07:00', u'others': [u'2016-10-14T11:45:00.000-07:00', u'2016-10-15T11:45:00.000-07:00', u'2016-10-16T11:45:00.000-07:00']}, u'text': u'at 11:45am'}]
print(d.parse_temperature(u'Let\'s change the temperatur from thirty two celsius to 65 degrees'))

[{'dim': 'time', 'text': 'at 11:45am', 'start': 11, 'end': 21, 'value': {'value': '2019-01-05T11:45:00.000+08:00', 'grain': 'minute', 'others': [{'grain': 'minute', 'value': '2019-01-05T11:45:00.000+08:00'}, {'grain': 'minute', 'value': '2019-01-06T11:45:00.000+08:00'}, {'grain': 'minute', 'value': '2019-01-07T11:45:00.000+08:00'}]}}]
[{'dim': 'temperature', 'text': '65 degrees', 'start': 55, 'end': 65, 'value': {'value': 65.0, 'unit': 'degree'}}, {'dim': 'temperature', 'text': 'thirty two celsius', 'start': 33, 'end': 51, 'value': {'value': 32.0, 'unit': 'celsius'}}]


In [13]:
from duckling import Duckling
d = Duckling()
d.load() # always load the model first
print(d.parse('tomorrow'))

[{'dim': 'time', 'body': 'tomorrow', 'value': {'type': 'value', 'value': '2019-01-05T00:00:00.000+08:00', 'grain': 'day', 'values': [{'type': 'value', 'value': '2019-01-05T00:00:00.000+08:00', 'grain': 'day'}]}, 'start': 0, 'end': 8}]


In [14]:
help(Duckling)

Help on class Duckling in module duckling.duckling:

class Duckling(builtins.object)
 |  Python wrapper for Duckling by wit.ai.
 |  
 |  Attributes:
 |      jvm_started: Optional attribute to specify if the JVM has already been
 |          started (with all Java dependencies loaded).
 |      parse_datetime: Optional attribute to specify if datetime string should
 |          be parsed with datetime.strptime(). Default is False.
 |      minimum_heap_size: Optional attribute to set initial and minimum heap
 |          size. Default is 128m.
 |      maximum_heap_size: Optional attribute to set maximum heap size. Default
 |          is 2048m.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, jvm_started=False, parse_datetime=False, minimum_heap_size='128m', maximum_heap_size='2048m')
 |      Initializes Duckling.
 |  
 |  load(self, languages=[])
 |      Loads the Duckling corpus.
 |      
 |      Languages can be specified, defaults to all.
 |      
 |      Args:
 |          languages

In [15]:
print(d.parse('tomorrow'))

[{'dim': 'time', 'body': 'tomorrow', 'value': {'type': 'value', 'value': '2019-01-05T00:00:00.000+08:00', 'grain': 'day', 'values': [{'type': 'value', 'value': '2019-01-05T00:00:00.000+08:00', 'grain': 'day'}]}, 'start': 0, 'end': 8}]


In [23]:
d = Duckling()
# d.load(languages=["en", "fr", "de"]) # always load the model first
d.load(languages=['fr$core'])
print(d.parse('Demain à midi'))

[]
