From e83bf87c34bce9b3af69ba4f90ca6751b85ebfc1 Mon Sep 17 00:00:00 2001 From: Nathan Myles Date: Tue, 16 May 2023 19:04:53 -0300 Subject: [PATCH] Fix mapping char_filter when mapping a hashtag Signed-off-by: Nathan Myles Issue: https://github.com/opensearch-project/OpenSearch/issues/7308 --- CHANGELOG.md | 1 + .../common/MappingCharFilterFactory.java | 2 +- .../common/MappingCharFilterFactoryTests.java | 8 +++---- .../test/analysis-common/50_char_filters.yml | 18 +++++++++++++++ .../opensearch/index/analysis/Analysis.java | 23 ++++++++++++++++++- 5 files changed, 46 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b76adc925210..2522ce8dbbf45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Support OpenSSL Provider with default Netty allocator ([#5460](https://github.com/opensearch-project/OpenSearch/pull/5460)) - Replaces ZipInputStream with ZipFile to fix Zip Slip vulnerability ([#7230](https://github.com/opensearch-project/OpenSearch/pull/7230)) - Add missing validation/parsing of SearchBackpressureMode of SearchBackpressureSettings ([#7541](https://github.com/opensearch-project/OpenSearch/pull/7541)) +- Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591)) ### Security diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java index d6d9f8975f2fc..bd241de749f11 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/MappingCharFilterFactory.java @@ -54,7 +54,7 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen MappingCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name); - List> rules = Analysis.parseWordList(env, settings, "mappings", this::parse); + List> rules = Analysis.parseWordList(env, settings, "mappings", this::parse, false); if (rules == null) { throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured"); } diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java index 7d059ff9ce1da..387eb4a377007 100644 --- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/MappingCharFilterFactoryTests.java @@ -37,11 +37,11 @@ public static CharFilterFactory create(String... rules) throws IOException { public void testRulesOk() throws IOException { MappingCharFilterFactory mappingCharFilterFactory = (MappingCharFilterFactory) create( - "# This is a comment", + "# => _hashtag_", ":) => _happy_", ":( => _sad_" ); - CharFilter inputReader = (CharFilter) mappingCharFilterFactory.create(new StringReader("I'm so :)")); + CharFilter inputReader = (CharFilter) mappingCharFilterFactory.create(new StringReader("I'm so :), I'm so :( #confused")); char[] tempBuff = new char[14]; StringBuilder output = new StringBuilder(); while (true) { @@ -49,7 +49,7 @@ public void testRulesOk() throws IOException { if (length == -1) break; output.append(tempBuff, 0, length); } - assertEquals("I'm so _happy_", output.toString()); + assertEquals("I'm so _happy_, I'm so _sad_ _hashtag_confused", output.toString()); } public void testRuleError() { @@ -64,7 +64,7 @@ public void testRuleError() { } public void testRulePartError() { - RuntimeException ex = expectThrows(RuntimeException.class, () -> create("# This is a comment", ":) => _happy_", "a:b")); + RuntimeException ex = expectThrows(RuntimeException.class, () -> create("# => _hashtag_", ":) => _happy_", "a:b")); assertEquals("Line [3]: Invalid mapping rule : [a:b]", ex.getMessage()); } } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml index 67e68428c07c7..0078575ae8e57 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml @@ -57,3 +57,21 @@ - match: { detail.tokenizer.tokens.0.start_offset: 0 } - match: { detail.tokenizer.tokens.0.end_offset: 15 } - match: { detail.tokenizer.tokens.0.position: 0 } +--- +"mapping_with_hashtag": + - do: + indices.analyze: + body: + text: 'test #test @test' + tokenizer: standard + filter: + - lowercase + char_filter: + - type: mapping + mappings: + - "# => _hashsign_" + - "@ => _atsign_" + - length: { tokens: 3 } + - match: { tokens.0.token: test } + - match: { tokens.1.token: _hashsign_test } + - match: { tokens.2.token: _atsign_test } diff --git a/server/src/main/java/org/opensearch/index/analysis/Analysis.java b/server/src/main/java/org/opensearch/index/analysis/Analysis.java index 52860fbf1dc3b..f4465c9dffac6 100644 --- a/server/src/main/java/org/opensearch/index/analysis/Analysis.java +++ b/server/src/main/java/org/opensearch/index/analysis/Analysis.java @@ -222,6 +222,16 @@ public static List parseWordList(Environment env, Settings settings, Stri return parseWordList(env, settings, settingPrefix + "_path", settingPrefix, parser); } + public static List parseWordList( + Environment env, + Settings settings, + String settingPrefix, + CustomMappingRuleParser parser, + boolean removeComments + ) { + return parseWordList(env, settings, settingPrefix + "_path", settingPrefix, parser, removeComments); + } + /** * Parses a list of words from the specified settings or from a file, with the given parser. * @@ -236,6 +246,17 @@ public static List parseWordList( String settingPath, String settingList, CustomMappingRuleParser parser + ) { + return parseWordList(env, settings, settingPath, settingList, parser, true); + } + + public static List parseWordList( + Environment env, + Settings settings, + String settingPath, + String settingList, + CustomMappingRuleParser parser, + boolean removeComments ) { List words = getWordList(env, settings, settingPath, settingList); if (words == null) { @@ -245,7 +266,7 @@ public static List parseWordList( int lineNum = 0; for (String word : words) { lineNum++; - if (word.startsWith("#") == false) { + if (removeComments == false || word.startsWith("#") == false) { try { rules.add(parser.apply(word)); } catch (RuntimeException ex) {