diff --git a/cpp/KeywordTagging/KeywordTagging.cpp b/cpp/KeywordTagging/KeywordTagging.cpp index 1d2cb79d..e964ec33 100644 --- a/cpp/KeywordTagging/KeywordTagging.cpp +++ b/cpp/KeywordTagging/KeywordTagging.cpp @@ -344,18 +344,6 @@ void KeywordTagging::load_tags_json(const MPFJob &job, map(prop)) LOG4CXX_DEBUG(hw_logger_, "Text is: " + - boost::locale::conv::utf_to_utf(text)) + boost::locale::conv::utf_to_utf(prop_text)) - if (is_only_ascii_whitespace(text)) { + if (is_only_ascii_whitespace(prop_text)) { LOG4CXX_WARN(hw_logger_, "No text to process for " + boost::locale::conv::utf_to_utf(prop)) continue; @@ -546,7 +533,7 @@ void KeywordTagging::process_text_tagging(Properties &detection_properties, cons bool full_regex = DetectionComponentUtils::GetProperty(job.job_properties, "FULL_REGEX_SEARCH", true); map>> trigger_tags_words_offset; - set found_tags_regex = search_regex(job, text, json_kvs_regex, trigger_tags_words_offset, full_regex); + set found_tags_regex = search_regex(job, prop_text, json_kvs_regex, trigger_tags_words_offset, full_regex); all_found_tags.insert(found_tags_regex.begin(), found_tags_regex.end()); wstring tag_string = boost::algorithm::join(found_tags_regex, L"; "); diff --git a/cpp/KeywordTagging/test/data/test-newlines.txt b/cpp/KeywordTagging/test/data/test-newlines.txt new file mode 100644 index 00000000..179c6bd4 --- /dev/null +++ b/cpp/KeywordTagging/test/data/test-newlines.txt @@ -0,0 +1,9 @@ +Contact Us | Other Text | This is an address street, MA, 55555 +someemail@place.org +777-777-7777 + +Use this special code when you contact us: +456-4564-456-46 + + +This sentence finally ends in a period. diff --git a/cpp/KeywordTagging/test/test_keyword_tagging.cpp b/cpp/KeywordTagging/test/test_keyword_tagging.cpp index b76d35b2..268944f3 100644 --- a/cpp/KeywordTagging/test/test_keyword_tagging.cpp +++ b/cpp/KeywordTagging/test/test_keyword_tagging.cpp @@ -55,12 +55,11 @@ bool containsProp(const std::string &exp_text, const std::vector &tracks, const std::string &prop, int index = -1) { ASSERT_TRUE(containsProp(expected_value, tracks, prop, index)) - << "Expected tagger to detect " << prop << " \"" << expected_value << "\" in " << image_path; + << "Expected tagger to detect " << prop << " \"" << expected_value << "\" in " << file_path; } void assertNotInText(const std::string &file_path, const std::string &expected_text, @@ -111,7 +109,6 @@ bool logging_initialized = init_logging(); TEST(KEYWORDTAGGING, TaggingTest) { KeywordTagging tagger; std::vector results; - std::map custom_properties_disabled = {{"FULL_REGEX_SEARCH", "false"}}; std::map custom_properties = {{}}; std::vector result; @@ -140,7 +137,6 @@ TEST(KEYWORDTAGGING, TaggingTest) { TEST(KEYWORDTAGGING, MulitpleTagsTest) { KeywordTagging tagger; std::vector results; - std::map custom_properties_disabled = {{"FULL_REGEX_SEARCH", "false"}}; std::map custom_properties = {{}}; tagger.SetRunDirectory("../plugin"); @@ -228,7 +224,6 @@ TEST(KEYWORDTAGGING, FullSearch) { TEST(KEYWORDTAGGING, LanguageTest) { KeywordTagging tagger; std::vector results; - std::map custom_properties_disabled = {{"FULL_REGEX_SEARCH", "false"}}; std::map custom_properties = {{}}; tagger.SetRunDirectory("../plugin"); @@ -236,10 +231,10 @@ TEST(KEYWORDTAGGING, LanguageTest) { ASSERT_TRUE(tagger.Init()); ASSERT_NO_FATAL_FAILURE(runKeywordTagging("data/eng-bul.txt", tagger, results, custom_properties)); - assertInText("data/eng-bul.txt", "foreign-text", results, "TAGS", 0); - assertInText("data/eng-bul.txt", "свободни", results, "TEXT TRIGGER WORDS", 0); - assertInText("data/eng-bul.txt", "106-113", results, "TEXT TRIGGER WORDS OFFSET", 0); - assertInText("data/eng-bul.txt", "Всички хора се раждат свободни", results, "TEXT", 0); + assertInText("data/eng-bul.txt", "foreign-text", results, "TAGS"); + assertInText("data/eng-bul.txt", "свободни", results, "TEXT FOREIGN-TEXT TRIGGER WORDS"); + assertInText("data/eng-bul.txt", "106-113", results, "TEXT FOREIGN-TEXT TRIGGER WORDS OFFSET"); + assertInText("data/eng-bul.txt", "Всички хора се раждат свободни", results, "TEXT"); ASSERT_TRUE(tagger.Close()); } @@ -572,16 +567,41 @@ TEST(KEYWORDTAGGING, FeedForwardTags) { ASSERT_TRUE(tagger.Init()); MPFGenericTrack track(0.9, - {{"TAGS", "FeedForwardTag"}, - {"BAR", "cash"}}); - MPFGenericJob job("JOB NAME", "/some/path", track, - { { "FEED_FORWARD_PROP_TO_PROCESS", "FOO,BAR" } }, {}); + {{"TAGS", "FeedForwardTag"}, + {"BAR", "cash"}}); + MPFGenericJob job("JOB NAME", "/some/path", track, + { { "FEED_FORWARD_PROP_TO_PROCESS", "FOO,BAR" } }, {}); - std::vector results = tagger.GetDetections(job); - ASSERT_EQ(1, results.size()); - ASSERT_EQ(track.confidence, results.at(0).confidence); + std::vector results = tagger.GetDetections(job); + ASSERT_EQ(1, results.size()); + ASSERT_EQ(track.confidence, results.at(0).confidence); - Properties props = results.at(0).detection_properties; - ASSERT_EQ(4, props.size()); - ASSERT_EQ("feedforwardtag; financial", props["TAGS"]); + Properties props = results.at(0).detection_properties; + ASSERT_EQ(4, props.size()); + ASSERT_EQ("feedforwardtag; financial", props["TAGS"]); +} + + +TEST(KEYWORDTAGGING, NewLines) { + KeywordTagging tagger; + std::vector results; + std::map custom_properties = {{}}; + + tagger.SetRunDirectory("../plugin"); + + ASSERT_TRUE(tagger.Init()); + + ASSERT_NO_FATAL_FAILURE(runKeywordTagging("data/test-newlines.txt", tagger, results, custom_properties)); + assertInText("data/test-newlines.txt", "identity document", results, "TAGS"); + assertInText("data/test-newlines.txt", "address", results, "TEXT IDENTITY DOCUMENT TRIGGER WORDS"); + assertInText("data/test-newlines.txt", "37-43", results, "TEXT IDENTITY DOCUMENT TRIGGER WORDS OFFSET"); + assertInText("data/test-newlines.txt", "personal", results, "TAGS"); + assertInText("data/test-newlines.txt", "777-777-7777", results, "TEXT PERSONAL TRIGGER WORDS"); + assertInText("data/test-newlines.txt", "83-94", results, "TEXT PERSONAL TRIGGER WORDS OFFSET"); + assertInText("data/test-newlines.txt", "564-456-46", results, "TEXT PERSONAL TRIGGER WORDS"); + assertInText("data/test-newlines.txt", "145-154", results, "TEXT PERSONAL TRIGGER WORDS OFFSET"); + assertInText("data/test-newlines.txt", "Text", results, "TEXT PERSONAL TRIGGER WORDS"); + assertInText("data/test-newlines.txt", "19-22", results, "TEXT PERSONAL TRIGGER WORDS OFFSET"); + + ASSERT_TRUE(tagger.Close()); }