Skip to content

Commit

Permalink
Highlighting can return excerpt with no highlights
Browse files Browse the repository at this point in the history
You can configure the highlighting api to return an excerpt of a field
even if there wasn't a match on the field.

The FVH makes excerpts from the beginning of the string to the first
boundary character after the requested length or the boundary_max_scan,
whichever comes first.  The Plain highlighter makes excerpts from the
beginning of the string to the end of the last token before the requested
length.

Closes elastic#1171
  • Loading branch information
nik9000 authored and javanna committed Oct 24, 2013
1 parent 7c4e452 commit faccdf1
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 19 deletions.
22 changes: 22 additions & 0 deletions docs/reference/search/request/highlighting.asciidoc
Expand Up @@ -164,6 +164,28 @@ is required. Note that `fragment_size` is ignored in this case.
When using `fast-vector-highlighter` one can use `fragment_offset`
parameter to control the margin to start highlighting from.

coming[0.90.6]
It is also possible to ask Elasticsearch to return a fragment from the
beginning of the field in the case where there are no matches by setting
`no_match_size` to something greater than 0. The default is 0.

[source,js]
--------------------------------------------------
{
"query" : {...},
"highlight" : {
"fields" : {
"content" : {
"fragment_size" : 150,
"number_of_fragments" : 3,
"no_match_size": 150
}
}
}
}
--------------------------------------------------


==== Highlight query

coming[0.90.6]
Expand Down
Expand Up @@ -656,6 +656,17 @@ public SearchRequestBuilder setHighlighterType(String type) {
return this;
}

/**
* Sets the size of the fragment to return from the beginning of the field if there are no matches to
* highlight and the field doesn't also define noMatchSize.
* @param noMatchSize integer to set or null to leave out of request. default is null.
* @return this builder for chaining
*/
public SearchRequestBuilder setHighlighterNoMatchSize(Integer noMatchSize) {
highlightBuilder().noMatchSize(noMatchSize);
return this;
}

public SearchRequestBuilder setHighlighterOptions(Map<String, Object> options) {
highlightBuilder().options(options);
return this;
Expand Down
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.vectorhighlight.*;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
Expand All @@ -35,6 +36,7 @@
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
import org.elasticsearch.search.internal.SearchContext;

import java.util.Collections;
import java.util.Map;

/**
Expand Down Expand Up @@ -150,11 +152,21 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
if (fragments != null && fragments.length > 0) {
return new HighlightField(field.field(), StringText.convertFromStringArray(fragments));
}
int noMatchSize = highlighterContext.field.noMatchSize();
if (noMatchSize > 0) {
// Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.names().indexName(),
fieldFragList, 1, field.preTags(), field.postTags(), encoder);
if (fragments != null && fragments.length > 0) {
return new HighlightField(field.field(), StringText.convertFromStringArray(fragments));
}
}
return null;
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}

return null;
}

private class MapperHighlightEntry {
Expand Down
Expand Up @@ -56,6 +56,8 @@ public class HighlightBuilder implements ToXContent {

private QueryBuilder highlightQuery;

private Integer noMatchSize;

private Map<String, Object> options;

/**
Expand Down Expand Up @@ -212,6 +214,17 @@ public HighlightBuilder highlightQuery(QueryBuilder highlightQuery) {
return this;
}

/**
* Sets the size of the fragment to return from the beginning of the field if there are no matches to
* highlight and the field doesn't also define noMatchSize.
* @param noMatchSize integer to set or null to leave out of request. default is null.
* @return this for chaining
*/
public HighlightBuilder noMatchSize(Integer noMatchSize) {
this.noMatchSize = noMatchSize;
return this;
}

/**
* Allows to set custom options for custom highlighters.
*/
Expand Down Expand Up @@ -250,6 +263,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (highlightQuery != null) {
builder.field("highlight_query", highlightQuery);
}
if (noMatchSize != null) {
builder.field("no_match_size", noMatchSize);
}
if (options != null && options.size() > 0) {
builder.field("options", options);
}
Expand Down Expand Up @@ -296,6 +312,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (field.highlightQuery != null) {
builder.field("highlight_query", field.highlightQuery);
}
if (field.noMatchSize != null) {
builder.field("no_match_size", field.noMatchSize);
}
if (field.options != null && field.options.size() > 0) {
builder.field("options", field.options);
}
Expand Down Expand Up @@ -324,6 +343,7 @@ public static class Field {
String highlighterType;
String fragmenter;
QueryBuilder highlightQuery;
Integer noMatchSize;
Map<String, Object> options;

public Field(String name) {
Expand Down Expand Up @@ -426,6 +446,17 @@ public Field highlightQuery(QueryBuilder highlightQuery) {
return this;
}

/**
* Sets the size of the fragment to return from the beginning of the field if there are no matches to
* highlight.
* @param noMatchSize integer to set or null to leave out of request. default is null.
* @return this for chaining
*/
public Field noMatchSize(Integer noMatchSize) {
this.noMatchSize = noMatchSize;
return this;
}

/**
* Allows to set custom options for custom highlighters.
* This overrides global settings set by {@link HighlightBuilder#options(Map<String, Object>)}.
Expand Down
Expand Up @@ -82,6 +82,7 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
String globalFragmenter = null;
Map<String, Object> globalOptions = null;
Query globalHighlightQuery = null;
int globalNoMatchSize = 0;

while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
Expand Down Expand Up @@ -131,6 +132,8 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
globalHighlighterType = parser.text();
} else if ("fragmenter".equals(topLevelFieldName)) {
globalFragmenter = parser.text();
} else if ("no_match_size".equals(topLevelFieldName) || "noMatchSize".equals(topLevelFieldName)) {
globalNoMatchSize = parser.intValue();
}
} else if (token == XContentParser.Token.START_OBJECT && "options".equals(topLevelFieldName)) {
globalOptions = parser.map();
Expand Down Expand Up @@ -186,6 +189,8 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
field.highlighterType(parser.text());
} else if ("fragmenter".equals(fieldName)) {
field.fragmenter(parser.text());
} else if ("no_match_size".equals(fieldName) || "noMatchSize".equals(fieldName)) {
field.noMatchSize(parser.intValue());
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("highlight_query".equals(fieldName) || "highlightQuery".equals(fieldName)) {
Expand Down Expand Up @@ -251,6 +256,9 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
if (field.highlightQuery() == null && globalHighlightQuery != null) {
field.highlightQuery(globalHighlightQuery);
}
if (field.noMatchSize() == -1) {
field.noMatchSize(globalNoMatchSize);
}
}

context.highlight(new SearchContextHighlight(fields));
Expand Down
Expand Up @@ -30,13 +30,15 @@
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
Expand Down Expand Up @@ -164,10 +166,51 @@ public int compare(TextFragment o1, TextFragment o2) {
if (fragments != null && fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
}

int noMatchSize = highlighterContext.field.noMatchSize();
if (noMatchSize > 0 && textsToHighlight.size() >= 1) {
// Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
String fieldContents = textsToHighlight.get(0).toString();
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
int end;
try {
end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.names().indexName(), fieldContents));
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
if (end > 0) {
return new HighlightField(highlighterContext.fieldName, new Text[] { new StringText(fieldContents.substring(0, end)) });
}
}
return null;
}

private int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException {
try {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
// We've exhausted the token stream so we should just highlight everything.
return end;
} finally {
tokenStream.end();
tokenStream.close();
}
}

private static class Encoders {
public static Encoder DEFAULT = new DefaultEncoder();
public static Encoder HTML = new SimpleHTMLEncoder();
Expand Down
Expand Up @@ -40,7 +40,7 @@ public List<Field> fields() {
}

public static class Field {

// Fields that default to null or -1 are often set to their real default in HighlighterParseElement#parse
private final String field;

private int fragmentCharSize = -1;
Expand All @@ -66,10 +66,13 @@ public static class Field {
private String fragmenter;

private int boundaryMaxScan = -1;

private Character[] boundaryChars = null;

private Query highlightQuery;

private int noMatchSize = -1;

private Map<String, Object> options;

public Field(String field) {
Expand Down Expand Up @@ -192,6 +195,14 @@ public void highlightQuery(Query highlightQuery) {
this.highlightQuery = highlightQuery;
}

public int noMatchSize() {
return noMatchSize;
}

public void noMatchSize(int noMatchSize) {
this.noMatchSize = noMatchSize;
}

public Map<String, Object> options() {
return options;
}
Expand Down

0 comments on commit faccdf1

Please sign in to comment.