Skip to content

Commit

Permalink
feature: lookup intent parser (#121)
Browse files Browse the repository at this point in the history
* feature: lookup intent parser

A new hashmap based intent parser.

The normalized/canonical form of an utterance serves as the key and the value is tuple of `(intent_id, [vec_of_slots_ids])`

Once a lookup is done at inference, the intent and slots are retrieved by matching their ids to a vec of intent names and a vec of slot names respectively.

This is the rust implementation of snipsco/snips-nlu#759
  • Loading branch information
mattgathu authored and adrienball committed Jul 11, 2019
1 parent b5cd19b commit 774a20b
Show file tree
Hide file tree
Showing 48 changed files with 26,880 additions and 241,426 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ members = [

[dependencies]
crfsuite = { git = "https://github.com/snipsco/crfsuite-rs", tag = "0.3.1" }
snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.64.7" }
snips-nlu-parsers = { git = "https://github.com/snipsco/snips-nlu-parsers", tag = "0.2.2" }
snips-nlu-utils = { git = "https://github.com/snipsco/snips-nlu-utils", tag = "0.8.0" }
snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.64.8" }
snips-nlu-parsers = { git = "https://github.com/snipsco/snips-nlu-parsers", tag = "0.2.3" }
snips-nlu-utils = { git = "https://github.com/snipsco/snips-nlu-utils", tag = "0.9.0" }
failure = "0.1"
base64 = "0.10"
itertools = { version = "0.8", default-features = false }
Expand Down
4 changes: 4 additions & 0 deletions data/tests/configs/beverage_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
{
"unit_name": "nlu_engine",
"intent_parsers_configs": [
{
"unit_name": "lookup_intent_parser",
"ignore_stop_words": true
},
{
"unit_name": "deterministic_intent_parser",
"max_pattern_length": 1000,
Expand Down
Binary file modified data/tests/models/nlu_engine.zip
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@
"beverage_temperature": "Temperature",
"number_of_cups": "snips/number"
}
}
},
"stop_words_whitelist": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"config": {
"ignore_stop_words": true,
"unit_name": "lookup_intent_parser"
},
"entity_scopes": [
{
"entity_scope": {
"builtin": [
"snips/number"
],
"custom": []
},
"intent_group": [
"MakeCoffee"
]
},
{
"entity_scope": {
"builtin": [
"snips/number"
],
"custom": [
"Temperature"
]
},
"intent_group": [
"MakeTea"
]
}
],
"intents_names": [
"MakeCoffee",
"MakeTea"
],
"language_code": "en",
"map": {
"-1658454006": [
1,
[
0
]
],
"-1533083481": [
0,
[
0
]
],
"-1416877420": [
0,
[
0
]
],
"-1362288387": [
1,
[
0,
1
]
],
"-687749971": [
0,
[
0
]
],
"1085718744": [
1,
[
1
]
],
"1413162768": [
1,
[
0,
1
]
]
},
"slots_names": [
"number_of_cups",
"beverage_temperature"
],
"stop_words_whitelist": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"unit_name": "lookup_intent_parser"
}
28 changes: 10 additions & 18 deletions data/tests/models/nlu_engine/nlu_engine.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
"builtin_entity_parser": "builtin_entity_parser",
"config": {
"intent_parsers_configs": [
{
"ignore_stop_words": true,
"unit_name": "lookup_intent_parser"
},
{
"ignore_stop_words": true,
"max_pattern_length": 1000,
Expand All @@ -11,21 +15,21 @@
{
"intent_classifier_config": {
"data_augmentation_config": {
"add_builtin_entities_examples": true,
"max_unknown_words": null,
"add_builtin_entities_examples": false,
"max_unknown_words": 0,
"min_utterances": 20,
"noise_factor": 5,
"unknown_word_prob": 0,
"unknown_words_replacement_string": null
},
"featurizer_config": {
"added_cooccurrence_feature_ratio": 0.0,
"added_cooccurrence_feature_ratio": 0.25,
"cooccurrence_vectorizer_config": {
"filter_stop_words": true,
"keep_order": true,
"unit_name": "cooccurrence_vectorizer",
"unknown_words_replacement_string": null,
"window_size": null
"window_size": 5
},
"pvalue_threshold": 0.4,
"tfidf_vectorizer_config": {
Expand Down Expand Up @@ -155,19 +159,6 @@
-1,
0
]
},
{
"args": {
"cluster_name": "brown_clusters",
"use_stemming": false
},
"factory_name": "word_cluster",
"offsets": [
-2,
-1,
0,
1
]
}
],
"random_seed": null,
Expand Down Expand Up @@ -198,10 +189,11 @@
}
},
"intent_parsers": [
"lookup_intent_parser",
"deterministic_intent_parser",
"probabilistic_intent_parser"
],
"model_version": "0.19.0",
"training_package_version": "0.18.0",
"training_package_version": "0.19.6",
"unit_name": "nlu_engine"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"unit_name": "cooccurrence_vectorizer"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"builtin_entity_scope": [
"snips/number"
],
"config": {
"filter_stop_words": true,
"keep_order": true,
"unit_name": "cooccurrence_vectorizer",
"unknown_words_replacement_string": null,
"window_size": 5
},
"language_code": "en",
"word_pairs": {
"0": [
"SNIPSNUMBER",
"coffee"
],
"1": [
"SNIPSNUMBER",
"tea"
],
"2": [
"TEMPERATURE",
"tea"
],
"3": [
"of",
"coffee"
],
"4": [
"of",
"tea"
]
}
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"config": {
"added_cooccurrence_feature_ratio": 0.0,
"added_cooccurrence_feature_ratio": 0.25,
"cooccurrence_vectorizer_config": {
"filter_stop_words": true,
"keep_order": true,
"unit_name": "cooccurrence_vectorizer",
"unknown_words_replacement_string": null,
"window_size": null
"window_size": 5
},
"pvalue_threshold": 0.4,
"tfidf_vectorizer_config": {
Expand All @@ -16,7 +16,7 @@
},
"unit_name": "featurizer"
},
"cooccurrence_vectorizer": null,
"cooccurrence_vectorizer": "cooccurrence_vectorizer",
"language_code": "en",
"tfidf_vectorizer": "tfidf_vectorizer"
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
"idf_diag": [
4.157000421150114,
3.176171168138387,
4.339321977944068,
4.157000421150114,
3.8693183486983322,
2.3652409519220585,
2.2110902720948,
3.3838105329166317,
2.904237452654745,
4.157000421150114,
Expand All @@ -25,12 +25,12 @@
3.3838105329166317,
3.1155465463219523,
3.0583881324820035,
1.9057086225436182,
1.923408199643019,
4.002849741322855,
4.157000421150114,
3.3838105329166317,
2.904237452654745,
2.514772685893022,
2.547562508716013,
4.157000421150114,
3.176171168138387
],
Expand Down
Loading

0 comments on commit 774a20b

Please sign in to comment.