Skip to content

Commit

Permalink
Fix (simple)_query_string to ignore removed terms (elastic#28871)
Browse files Browse the repository at this point in the history
This change ensures that we ignore terms removed from the analysis rather than returning a match_no_docs query for the part
that contain the stop word. For instance a query like "the AND fox" should ignore "the" if it is considered as a stop word instead of
adding a match_no_docs query.
This change also fixes the analysis of prefix terms that start with a stop word (e.g. `the*`). In such case if `analyze_wildcard` is true and `the`
is considered as a stop word this part of the query is rewritten into a match_no_docs query. Since it's a prefix query this change forces the prefix query
on `the` even if it is removed from the analysis.

Fixes elastic#28855
Fixes elastic#28856
  • Loading branch information
jimczi committed Mar 4, 2018
1 parent 000f8ea commit 41efd67
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ public void writeTo(StreamOutput out) throws IOException {

public enum ZeroTermsQuery implements Writeable {
NONE(0),
ALL(1);
ALL(1),
// this is used internally to make sure that query_string and simple_query_string
// ignores query part that removes all tokens.
NULL(2);

private final int ordinal;

Expand Down Expand Up @@ -312,10 +315,16 @@ protected final Query termQuery(MappedFieldType fieldType, BytesRef value, boole
}

protected Query zeroTermsQuery() {
if (zeroTermsQuery == DEFAULT_ZERO_TERMS_QUERY) {
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present.");
switch (zeroTermsQuery) {
case NULL:
return null;
case NONE:
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present");
case ALL:
return Queries.newMatchAllQuery();
default:
throw new IllegalStateException("unknown zeroTermsQuery " + zeroTermsQuery);
}
return Queries.newMatchAllQuery();
}

private class MatchQueryBuilder extends QueryBuilder {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ private QueryStringQueryParser(QueryShardContext context, String defaultField,
this.context = context;
this.fieldsAndWeights = Collections.unmodifiableMap(fieldsAndWeights);
this.queryBuilder = new MultiMatchQuery(context);
queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
queryBuilder.setLenient(lenient);
this.lenient = lenient;
}
Expand Down Expand Up @@ -343,7 +344,6 @@ protected Query getFieldQuery(String field, String queryText, int slop) throws P
if (fields.isEmpty()) {
return newUnmappedFieldQuery(field);
}
final Query query;
Analyzer oldAnalyzer = queryBuilder.analyzer;
int oldSlop = queryBuilder.phraseSlop;
try {
Expand All @@ -353,7 +353,7 @@ protected Query getFieldQuery(String field, String queryText, int slop) throws P
queryBuilder.setAnalyzer(forceAnalyzer);
}
queryBuilder.setPhraseSlop(slop);
query = queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, fields, queryText, null);
Query query = queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, fields, queryText, null);
return applySlop(query, slop);
} catch (IOException e) {
throw new ParseException(e.getMessage());
Expand Down Expand Up @@ -555,7 +555,7 @@ private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throw
}

if (tlist.size() == 0) {
return new MatchNoDocsQuery("analysis was empty for " + field + ":" + termStr);
return super.getPrefixQuery(field, termStr);
}

if (tlist.size() == 1 && tlist.get(0).size() == 1) {
Expand Down Expand Up @@ -763,7 +763,7 @@ private PhraseQuery addSlopToPhrase(PhraseQuery query, int slop) {
@Override
public Query parse(String query) throws ParseException {
if (query.trim().isEmpty()) {
return queryBuilder.zeroTermsQuery();
return Queries.newMatchNoDocsQuery("Matching no documents because no terms present");
}
return super.parse(query);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public SimpleQueryStringQueryParser(Analyzer analyzer, Map<String, Float> weight
this.queryBuilder = new MultiMatchQuery(context);
this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery());
this.queryBuilder.setLenient(settings.lenient());
this.queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
if (analyzer != null) {
this.queryBuilder.setAnalyzer(analyzer);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ protected MatchQueryBuilder doCreateTestQueryBuilder() {
}

if (randomBoolean()) {
matchQuery.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
matchQuery.zeroTermsQuery(randomFrom(ZeroTermsQuery.ALL, ZeroTermsQuery.NONE));
}

if (randomBoolean()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() {
query.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
}
if (randomBoolean()) {
query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.NONE, MatchQuery.ZeroTermsQuery.ALL));
}
if (randomBoolean()) {
query.autoGenerateSynonymsPhraseQuery(randomBoolean());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,33 @@ public void testToFuzzyQuery() throws Exception {
assertEquals(expected, query);
}

public void testWithStopWords() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new QueryStringQueryBuilder("the quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), Occur.SHOULD)
.build();
assertEquals(expected, query);
}

public void testWithPrefixStopWords() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new QueryStringQueryBuilder("the* quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), Occur.SHOULD)
.build();
assertEquals(expected, query);
}

private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
Settings build = Settings.builder().put(oldIndexSettings)
.put(indexSettings)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,33 @@ public void testLenientToPrefixQuery() throws Exception {
assertEquals(expected, query);
}

public void testWithStopWords() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new SimpleQueryStringBuilder("the quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), BooleanClause.Occur.SHOULD)
.build();
assertEquals(expected, query);
}

public void testWithPrefixStopWords() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new SimpleQueryStringBuilder("the* quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "fox")), BooleanClause.Occur.SHOULD)
.build();
assertEquals(expected, query);
}

private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
Settings build = Settings.builder().put(oldIndexSettings)
.put(indexSettings)
Expand Down

0 comments on commit 41efd67

Please sign in to comment.