Skip to content

Commit

Permalink
Add the ability to ignore or fail on numeric fields when executing mo…
Browse files Browse the repository at this point in the history
…re-like-this or fuzzy-like-this queries.

More-like-this and fuzzy-like-this queries expect analyzers which are able to
generate character terms (CharTermAttribute), so unfortunately this doesn't
work with analyzers which generate binary-only terms (BinaryTermAttribute,
the default CharTermAttribute impl being a special BinaryTermAttribute) such as
our analyzers for numeric fields (byte, short, integer, long, float, double but
also date and ip).

To work around this issue, this commits adds a fail_on_unsupported_field
parameter to the more-like-this and fuzzy-like-this parsers. When this parameter
is false, numeric fields will just be ignored and when it is true, an error will
be returned, saying that these queries don't support numeric fields. By default,
this setting is true but the mlt API sets it to true in order not to fail on
documents which contain numeric fields.

Close elastic#3252
  • Loading branch information
jpountz committed Jul 15, 2013
1 parent 28b9e25 commit f5c69c6
Show file tree
Hide file tree
Showing 19 changed files with 341 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public void onResponse(GetResponse getResponse) {
GetField getField = getResponse.getField(field);
if (getField != null) {
for (Object value : getField.getValues()) {
addMoreLikeThis(request, boolBuilder, getField.getName(), value.toString());
addMoreLikeThis(request, boolBuilder, getField.getName(), value.toString(), true);
}
it.remove();
}
Expand Down Expand Up @@ -282,7 +282,7 @@ public boolean beforeFieldAdded(FieldMapper fieldMapper, Field field, Object par
}

if (fields.isEmpty() || fields.contains(field.name())) {
addMoreLikeThis(request, boolBuilder, fieldMapper, field);
addMoreLikeThis(request, boolBuilder, fieldMapper, field, !fields.isEmpty());
}

return false;
Expand All @@ -302,11 +302,11 @@ private Object convertField(Field field) {
}
}

private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, FieldMapper fieldMapper, Field field) {
addMoreLikeThis(request, boolBuilder, field.name(), fieldMapper.value(convertField(field)).toString());
private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, FieldMapper fieldMapper, Field field, boolean failOnUnsupportedField) {
addMoreLikeThis(request, boolBuilder, field.name(), fieldMapper.value(convertField(field)).toString(), failOnUnsupportedField);
}

private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, String fieldName, String likeText) {
private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, String fieldName, String likeText, boolean failOnUnsupportedField) {
MoreLikeThisFieldQueryBuilder mlt = moreLikeThisFieldQuery(fieldName)
.likeText(likeText)
.percentTermsToMatch(request.percentTermsToMatch())
Expand All @@ -317,7 +317,8 @@ private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolB
.maxWordLen(request.maxWordLen())
.minTermFreq(request.minTermFreq())
.maxQueryTerms(request.maxQueryTerms())
.stopWords(request.stopWords());
.stopWords(request.stopWords())
.failOnUnsupportedField(failOnUnsupportedField);
boolBuilder.should(mlt);
}

Expand Down
37 changes: 33 additions & 4 deletions src/main/java/org/elasticsearch/index/analysis/Analysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
Expand All @@ -48,6 +51,7 @@
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
Expand All @@ -61,10 +65,7 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.settings.IndexSettings;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.*;
import java.net.URL;
import java.util.*;

Expand Down Expand Up @@ -275,4 +276,32 @@ public static Reader getReaderFromFile(Environment env, Settings settings, Strin

return reader;
}

/**
* Check whether the provided token stream is able to provide character
* terms.
* <p>Although most analyzers generate character terms (CharTermAttribute),
* some token only contain binary terms (BinaryTermAttribute,
* CharTermAttribute being a special type of BinaryTermAttribute), such as
* {@link NumericTokenStream} and unsuitable for highlighting and
* more-like-this queries which expect character terms.</p>
*/
public static boolean isCharacterTokenStream(TokenStream tokenStream) {
try {
tokenStream.addAttribute(CharTermAttribute.class);
return true;
} catch (IllegalArgumentException e) {
return false;
}
}

/**
* Check whether {@link TokenStream}s generated with <code>analyzer</code>
* provide with character terms.
* @see #isCharacterTokenStream(TokenStream)
*/
public static boolean generatesCharacterTokenStream(Analyzer analyzer, String fieldName) throws IOException {
return isCharacterTokenStream(analyzer.tokenStream(fieldName, new StringReader("")));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ public static class CustomByteNumericField extends CustomNumericField {
private final NumberFieldMapper mapper;

public CustomByteNumericField(NumberFieldMapper mapper, byte number, FieldType fieldType) {
super(mapper, mapper.fieldType.stored() ? number : null, fieldType);
super(mapper, number, fieldType);
this.mapper = mapper;
this.number = number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ public static class CustomDoubleNumericField extends CustomNumericField {
private final NumberFieldMapper mapper;

public CustomDoubleNumericField(NumberFieldMapper mapper, double number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType);
super(mapper, number, fieldType);
this.mapper = mapper;
this.number = number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ public static class CustomFloatNumericField extends CustomNumericField {
private final NumberFieldMapper mapper;

public CustomFloatNumericField(NumberFieldMapper mapper, float number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType);
super(mapper, number, fieldType);
this.mapper = mapper;
this.number = number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ public static class CustomIntegerNumericField extends CustomNumericField {
private final NumberFieldMapper mapper;

public CustomIntegerNumericField(NumberFieldMapper mapper, int number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType);
super(mapper, number, fieldType);
this.mapper = mapper;
this.number = number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ public static class CustomLongNumericField extends CustomNumericField {
private final NumberFieldMapper mapper;

public CustomLongNumericField(NumberFieldMapper mapper, long number, FieldType fieldType) {
super(mapper, mapper.fieldType.stored() ? number : null, fieldType);
super(mapper, number, fieldType);
this.mapper = mapper;
this.number = number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ public static class CustomShortNumericField extends CustomNumericField {
private final NumberFieldMapper mapper;

public CustomShortNumericField(NumberFieldMapper mapper, short number, FieldType fieldType) {
super(mapper, mapper.fieldType().stored() ? number : null, fieldType);
super(mapper, number, fieldType);
this.mapper = mapper;
this.number = number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class FuzzyLikeThisFieldQueryBuilder extends BaseQueryBuilder implements
private Integer maxQueryTerms;
private Boolean ignoreTF;
private String analyzer;
private boolean failOnUnsupportedField;

/**
* A fuzzy more like this query on the provided field.
Expand Down Expand Up @@ -89,6 +90,14 @@ public FuzzyLikeThisFieldQueryBuilder boost(float boost) {
return this;
}

/**
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
*/
public FuzzyLikeThisFieldQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
return this;
}

@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(FuzzyLikeThisFieldQueryParser.NAME);
Expand All @@ -115,6 +124,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (!failOnUnsupportedField) {
builder.field("fail_on_unsupported_field", failOnUnsupportedField);
}
builder.endObject();
builder.endObject();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.mapper.MapperService;

import java.io.IOException;
Expand Down Expand Up @@ -67,6 +69,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
int prefixLength = 0;
boolean ignoreTF = false;
Analyzer analyzer = null;
boolean failOnUnsupportedField = true;

XContentParser.Token token = parser.nextToken();
if (token != XContentParser.Token.FIELD_NAME) {
Expand Down Expand Up @@ -100,6 +103,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
prefixLength = parser.intValue();
} else if ("analyzer".equals(currentFieldName)) {
analyzer = parseContext.analysisService().analyzer(parser.text());
} else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) {
failOnUnsupportedField = parser.booleanValue();
} else {
throw new QueryParsingException(parseContext.index(), "[flt_field] query does not support [" + currentFieldName + "]");
}
Expand All @@ -122,6 +127,13 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer();
}
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new ElasticSearchIllegalArgumentException("fuzzy_like_this_field doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
return null;
}
}

FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer);
query.addTerms(likeText, fieldName, minSimilarity, prefixLength);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class FuzzyLikeThisQueryBuilder extends BaseQueryBuilder implements Boost
private Integer maxQueryTerms;
private Boolean ignoreTF;
private String analyzer;
private boolean failOnUnsupportedField = true;;

/**
* Constructs a new fuzzy like this query which uses the "_all" field.
Expand Down Expand Up @@ -96,6 +97,14 @@ public FuzzyLikeThisQueryBuilder boost(float boost) {
return this;
}

/**
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
*/
public FuzzyLikeThisQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
return this;
}

@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(FuzzyLikeThisQueryParser.NAME);
Expand Down Expand Up @@ -128,6 +137,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (!failOnUnsupportedField) {
builder.field("fail_on_unsupported_field", failOnUnsupportedField);
}
builder.endObject();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,18 @@

package org.elasticsearch.index.query;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

/**
Expand Down Expand Up @@ -66,6 +70,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
int prefixLength = 0;
boolean ignoreTF = false;
Analyzer analyzer = null;
boolean failOnUnsupportedField = true;

XContentParser.Token token;
String currentFieldName = null;
Expand All @@ -87,12 +92,14 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
prefixLength = parser.intValue();
} else if ("analyzer".equals(currentFieldName)) {
analyzer = parseContext.analysisService().analyzer(parser.text());
} else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) {
failOnUnsupportedField = parser.booleanValue();
} else {
throw new QueryParsingException(parseContext.index(), "[flt] query does not support [" + currentFieldName + "]");
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
fields = Lists.newArrayList();
fields = Lists.newLinkedList();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
fields.add(parseContext.indexName(parser.text()));
}
Expand All @@ -112,13 +119,26 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars

FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer);
if (fields == null) {
// add the default _all field
query.addTerms(likeText, parseContext.defaultField(), minSimilarity, prefixLength);
} else {
for (String field : fields) {
query.addTerms(likeText, field, minSimilarity, prefixLength);
fields = Lists.newArrayList(parseContext.defaultField());
} else if (fields.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "fuzzy_like_this requires 'fields' to be non-empty");
}
for (Iterator<String> it = fields.iterator(); it.hasNext(); ) {
final String fieldName = it.next();
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new ElasticSearchIllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
it.remove();
}
}
}
if (fields.isEmpty()) {
return null;
}
for (String field : fields) {
query.addTerms(likeText, field, minSimilarity, prefixLength);
}
query.setBoost(boost);
query.setIgnoreTF(ignoreTF);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B
private float boostTerms = -1;
private float boost = -1;
private String analyzer;
private boolean failOnUnsupportedField;

/**
* A more like this query that runs against a specific field.
Expand Down Expand Up @@ -157,6 +158,14 @@ public MoreLikeThisFieldQueryBuilder boost(float boost) {
return this;
}

/**
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
*/
public MoreLikeThisFieldQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
return this;
}

@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(MoreLikeThisFieldQueryParser.NAME);
Expand Down Expand Up @@ -202,6 +211,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (!failOnUnsupportedField) {
builder.field("fail_on_unsupported_field", failOnUnsupportedField);
}
builder.endObject();
builder.endObject();
}
Expand Down
Loading

0 comments on commit f5c69c6

Please sign in to comment.