Skip to content
Browse files

Fixing bug with Snippeter triggered by characters folded by ASCIIFold…

…ingFilter, plus testcase
  • Loading branch information...
1 parent 2aa6bfb commit 0a5a7c2ac3c1aa3b987ee58b89585ac368328954 @clamprecht clamprecht committed Apr 3, 2012
View
5 src/main/java/com/flaptor/indextank/search/SnippetSearcher.java
@@ -189,9 +189,10 @@ private String mark(Window window, String text) {
for (Pair<AToken, Integer> token : window.matches) {
escapeAndAppend(buff, text, current, token.first().getStartOffset());
buff.append(open);
- escapeAndAppend(buff, text, token.first().getStartOffset(), token.first().getStartOffset() + token.last());
+ int endOffset = token.first().getEndOffset();
+ escapeAndAppend(buff, text, token.first().getStartOffset(), endOffset);
buff.append(close);
- current = token.first().getStartOffset() + token.last();
+ current = endOffset;
}
// let subclasses handle where snippets end
View
22 src/test/java/com/flaptor/indextank/search/SnippetSearcherTest.java
@@ -138,6 +138,28 @@ public void testEncodesHTMLonEnd() throws IOException, InterruptedException {
assertTrue("less-than signs not encoded!", sr.getField("snippet_text").contains("&lt;"));
}
+ @TestInfo(testType=UNIT)
+ public void testTokenizingChangesTokenLength() throws IOException, InterruptedException, ParseException {
+ double timestampBoost = System.currentTimeMillis() / 1000.0;
+ String docid = "docid";
+ // \u00df is 'LATIN SMALL LETTER SHARP S'
+ // ASCIIFoldingFilter converts it from 'ß' to 'ss'
+ // see http://www.fileformat.info/info/unicode/char/df/index.htm
+ String text = "Clown Ferdinand und der Fu\u00dfball player";
+ Document doc = new Document(ImmutableMap.of("text", text));
+ indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap());
+
+ String queryText = "fussball";
+ Query query = new Query(new TermQuery("text", queryText), queryText, null);
+
+ SearchResults srs = searcher.search(query, 0, 1, 0, ImmutableMap.of("snippet_fields", "text", "snippet_type", "html"));
+ SearchResult sr = srs.getResults().iterator().next();
+ String snippet = sr.getField("snippet_text");
+ assertNotNull("Snippet is null", snippet);
+ assertTrue("Search term not highlighted", snippet.contains("<b>Fu&szlig;ball</b>"));
+ assertTrue("Snippet lost space before highlighted term", snippet.contains("der "));
+ assertTrue("Snippet lost space after highlighted term", snippet.contains(" player"));
+ }
@TestInfo(testType=UNIT)
public void testFetchAll() throws IOException, InterruptedException {

0 comments on commit 0a5a7c2

Please sign in to comment.
Something went wrong with that request. Please try again.