Skip to content

Commit

Permalink
fix(custom_multiword_synonyms): re-enable support for custom multi-wo…
Browse files Browse the repository at this point in the history
…rd synonyms
  • Loading branch information
missinglink committed Aug 4, 2020
1 parent e1ce956 commit 19e5650
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 2 deletions.
21 changes: 19 additions & 2 deletions settings.js
Expand Up @@ -34,6 +34,7 @@ function generate(){
"filter": [
"lowercase",
"trim",
"synonyms/custom_admin/multiword",
"admin_synonyms_multiplexer",
"icu_folding",
"word_delimiter",
Expand All @@ -49,6 +50,7 @@ function generate(){
"filter": [
"lowercase",
"trim",
"synonyms/custom_name/multiword",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -81,6 +83,7 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_name/multiword",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -126,6 +129,7 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_street/multiword",
"street_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -227,10 +231,23 @@ function generate(){
// dynamically create filters for all synonym files in the ./synonyms directory.
// each filter is given the same name as the file, paths separators are replaced with
// underscores and the file extension is removed.
_.each(synonyms, (synonym, name) => {
// note: if no synonym entries are present in the list we use an array
// containing an empty space to avoid elasticsearch schema parsing errors.
_.each(synonyms, (entries, name) => {

const singleWordEntries = entries.filter(e => !/\s/.test(e))
const multiWordEntries = entries.filter(e => /\s/.test(e))

// generate a filter containing single-word synonyms
settings.analysis.filter[`synonyms/${name}`] = {
"type": "synonym",
"synonyms": !_.isEmpty(synonym) ? synonym : ['']
"synonyms": !_.isEmpty(singleWordEntries) ? singleWordEntries : ['']
};

// generate a filter containing multi-word synonyms
settings.analysis.filter[`synonyms/${name}/multiword`] = {
"type": "synonym",
"synonyms": !_.isEmpty(multiWordEntries) ? multiWordEntries : ['']
};
});

Expand Down
7 changes: 7 additions & 0 deletions synonyms/linter.js
Expand Up @@ -41,6 +41,7 @@ function linter(synonyms) {
letterCasing(line, logprefix, tokens);
tokensSanityCheck(line, logprefix, tokens);
multiWordCheck(line, logprefix, tokens);
tokenReplacementCheck(line, logprefix);
// tokenLengthCheck(line, logprefix, tokens);
})
})
Expand Down Expand Up @@ -74,6 +75,12 @@ function multiWordCheck(line, logprefix, tokens) {
});
}

function tokenReplacementCheck(line, logprefix) {
if (/=>/.test(line)) {
logger.warn(`${logprefix} synonym rule '=>' is not supported, use ',' instead`);
}
}

function tokenLengthCheck(line, logprefix, tokens) {
_.each(tokens, token => {
if (token.length <= 1) {
Expand Down
52 changes: 52 additions & 0 deletions test/fixtures/expected.json
Expand Up @@ -30,6 +30,7 @@
"filter": [
"lowercase",
"trim",
"synonyms/custom_admin/multiword",
"admin_synonyms_multiplexer",
"icu_folding",
"word_delimiter",
Expand All @@ -48,6 +49,7 @@
"filter": [
"lowercase",
"trim",
"synonyms/custom_name/multiword",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -86,6 +88,7 @@
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_name/multiword",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -142,6 +145,7 @@
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_street/multiword",
"street_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -218,18 +222,36 @@
""
]
},
"synonyms/custom_admin/multiword": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/custom_name": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/custom_name/multiword": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/custom_street": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/custom_street/multiword": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/directionals": {
"type": "synonym",
"synonyms": [
Expand Down Expand Up @@ -304,6 +326,12 @@
"sud,s"
]
},
"synonyms/directionals/multiword": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/personal_titles": {
"type": "synonym",
"synonyms": [
Expand Down Expand Up @@ -500,6 +528,12 @@
"veuve,vve"
]
},
"synonyms/personal_titles/multiword": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/place_names": {
"type": "synonym",
"synonyms": [
Expand Down Expand Up @@ -819,13 +853,25 @@
"étang,etang"
]
},
"synonyms/place_names/multiword": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/punctuation": {
"type": "synonym",
"synonyms": [
"&,and",
"&,und"
]
},
"synonyms/punctuation/multiword": {
"type": "synonym",
"synonyms": [
""
]
},
"synonyms/streets": {
"type": "synonym",
"synonyms": [
Expand Down Expand Up @@ -1639,6 +1685,12 @@
"wl,well",
"wls,wells"
]
},
"synonyms/streets/multiword": {
"type": "synonym",
"synonyms": [
""
]
}
},
"char_filter": {
Expand Down
4 changes: 4 additions & 0 deletions test/settings.js
Expand Up @@ -83,6 +83,7 @@ module.exports.tests.peliasAdminAnalyzer = function(test, common) {
t.deepEqual(analyzer.filter, [
"lowercase",
"trim",
"synonyms/custom_admin/multiword",
"admin_synonyms_multiplexer",
"icu_folding",
"word_delimiter",
Expand Down Expand Up @@ -130,6 +131,7 @@ module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) {
t.deepEqual( analyzer.filter, [
"lowercase",
"trim",
"synonyms/custom_name/multiword",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -186,6 +188,7 @@ module.exports.tests.peliasPhraseAnalyzer = function(test, common) {
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_name/multiword",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down Expand Up @@ -293,6 +296,7 @@ module.exports.tests.peliasStreetAnalyzer = function(test, common) {
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_street/multiword",
"street_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
Expand Down

0 comments on commit 19e5650

Please sign in to comment.