Skip to content

Commit

Permalink
Enable numeric sort optimisation for few numerical sort types (#6321)
Browse files Browse the repository at this point in the history
This commit restores the sort optimization to use BKD to skip non-competitive
docs for numeric types whose BYTES size match between the BKD leaf and doc
values encoding. For now this is only LONG, DOUBLE, DATE, and DATE_NANOSECONDS
as the remaining NumericTypes use 64bit docvalue encoding while the BKD uses
smaller byte encoded space.

This also updates the QueryPhase to remove the long time unnecessary in order
doc id check and minDoc boolean query for skipping non-competitive docs that is
handled by all Lucene 7.0+ sorted indexes.

Existing tests are updated.

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
Signed-off-by: gashutos <gashutos@amazon.com>

Co-authored-by: Nicholas Walter Knize <nknize@apache.org>
Co-authored-by: Chaitanya Gohel <gashutos@amazon.com>
  • Loading branch information
3 people committed Feb 15, 2023
1 parent 6b35c32 commit 6bb9e3e
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 65 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Segment Replication] Fix for peer recovery ([#5344](https://github.com/opensearch-project/OpenSearch/pull/5344))
- Fix weighted shard routing state across search requests([#6004](https://github.com/opensearch-project/OpenSearch/pull/6004))
- [Segment Replication] Fix bug where inaccurate sequence numbers are sent during replication ([#6122](https://github.com/opensearch-project/OpenSearch/pull/6122))
- Enable numeric sort optimisation for few numerical sort types ([#6321](https://github.com/opensearch-project/OpenSearch/pull/6321))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader, executor), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -379,7 +380,6 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size, executor));
QueryPhase.executeInternal(context.withCleanQueryResult(), queryPhaseSearcher);
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
reader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader, executor), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -331,10 +332,10 @@ public void testInOrderScrollOptimization() throws Exception {
assertNull(context.queryResult().terminatedEarly());
assertThat(context.terminateAfter(), equalTo(0));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertProfileData(context, "MatchAllDocsQuery", query -> {
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
Expand All @@ -346,25 +347,22 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size, executor));
QueryPhase.executeInternal(context.withCleanQueryResult().withProfilers(), queryPhaseSearcher);
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
assertThat(collector.getReason(), equalTo("search_terminate_after_count"));
assertThat(collector.getReason(), equalTo("search_top_hits"));
assertThat(collector.getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(1));
assertThat(collector.getProfiledChildren().get(0).getReason(), equalTo("search_top_hits"));
assertThat(collector.getProfiledChildren().get(0).getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(0));
});

reader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,76 @@ public abstract class IndexNumericFieldData implements IndexFieldData<LeafNumeri
* @opensearch.internal
*/
public enum NumericType {
BOOLEAN(false, SortField.Type.LONG, CoreValuesSourceType.BOOLEAN),
BYTE(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
SHORT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
INT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC),
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC);
BOOLEAN(false, SortField.Type.LONG, CoreValuesSourceType.BOOLEAN) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
BYTE(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
SHORT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
INT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE) {
@Deprecated
@Override
public PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
};

private final boolean floatingPoint;
private final ValuesSourceType valuesSourceType;
Expand All @@ -93,6 +153,24 @@ public final boolean isFloatingPoint() {
public final ValuesSourceType getValuesSourceType() {
return valuesSourceType;
}

@Deprecated
protected abstract PointSortOptimization applyPointSortOptimization();
}

/**
* Controls whether to apply sort optimization to skip non-competitive docs
* based on the BKD index.
*
* @deprecated this control will be removed in a future version of OpenSearch
*
* @opensearch.internal
* @opensearch.experimental
*/
@Deprecated
private enum PointSortOptimization {
ENABLED,
DISABLED
}

/**
Expand Down Expand Up @@ -133,8 +211,21 @@ public final SortField sortField(
: SortedNumericSelector.Type.MIN;
SortField sortField = new SortedNumericSortField(getFieldName(), getNumericType().sortFieldType, reverse, selectorType);
sortField.setMissingValue(source.missingObject(missingValue, reverse));
// todo: remove since deprecated
sortField.setOptimizeSortWithPoints(false);

// LUCENE-9280 added the ability for collectors to skip non-competitive
// documents when top docs are sorted by other fields different from the _score.
// However, from Lucene 9 onwards, numeric sort optimisation requires the byte size
// for points (BKD index) and doc values (columnar) and SortField.Type to be matched.
// NumericType violates this requirement
// (see: https://github.com/opensearch-project/OpenSearch/issues/2063#issuecomment-1069358826 test failure)
// because it uses the largest byte size (LONG) for the SortField of most types. The section below disables
// the BKD based sort optimization for numeric types whose encoded BYTE size does not match the comparator (LONG)/
// So as of now, we can only enable for DATE, DATE_NANOSECONDS, LONG, DOUBLE.
// BOOLEAN, BYTE, SHORT, INT, HALF_FLOAT, FLOAT (use long for doc values, but fewer for BKD Points)
// todo : Enable other SortField.Type as well, that will require wider change
if (getNumericType().applyPointSortOptimization() == PointSortOptimization.DISABLED) {
sortField.setOptimizeSortWithPoints(false);
}
return sortField;
}

Expand Down
31 changes: 1 addition & 30 deletions server/src/main/java/org/opensearch/search/query/QueryPhase.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,11 @@
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.lucene.queries.MinDocQuery;
import org.opensearch.lucene.queries.SearchAfterSortedDocQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
Expand Down Expand Up @@ -201,17 +198,7 @@ static boolean executeInternal(SearchContext searchContext, QueryPhaseSearcher q

} else {
final ScoreDoc after = scrollContext.lastEmittedDoc;
if (returnsDocsInOrder(query, searchContext.sort())) {
// now this gets interesting: since we sort in index-order, we can directly
// skip to the desired doc
if (after != null) {
query = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST)
.add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER)
.build();
}
// ... and stop collecting after ${size} matches
searchContext.terminateAfter(searchContext.size());
} else if (canEarlyTerminate(reader, searchContext.sort())) {
if (canEarlyTerminate(reader, searchContext.sort())) {
// now this gets interesting: since the search sort is a prefix of the index sort, we can directly
// skip to the desired doc
if (after != null) {
Expand Down Expand Up @@ -366,22 +353,6 @@ private static boolean searchWithCollector(
return topDocsFactory.shouldRescore();
}

/**
* Returns true if the provided <code>query</code> returns docs in index order (internal doc ids).
* @param query The query to execute
* @param sf The query sort
*/
private static boolean returnsDocsInOrder(Query query, SortAndFormats sf) {
if (sf == null || Sort.RELEVANCE.equals(sf.sort)) {
// sort by score
// queries that return constant scores will return docs in index
// order since Lucene tie-breaks on the doc id
return query.getClass() == ConstantScoreQuery.class || query.getClass() == MatchAllDocsQuery.class;
} else {
return Sort.INDEXORDER.equals(sf.sort);
}
}

/**
* Returns whether collection within the provided <code>reader</code> can be early-terminated if it sorts
* with <code>sortAndFormats</code>.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -358,7 +359,6 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size));
QueryPhase.executeInternal(context.withCleanQueryResult());
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
reader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -299,12 +300,11 @@ public void testInOrderScrollOptimization() throws Exception {
QueryPhase.executeInternal(context.withCleanQueryResult().withProfilers());
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertNull(context.queryResult().terminatedEarly());
assertThat(context.terminateAfter(), equalTo(0));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertProfileData(context, "MatchAllDocsQuery", query -> {
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
Expand All @@ -316,25 +316,22 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size));
QueryPhase.executeInternal(context.withCleanQueryResult().withProfilers());
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
assertThat(collector.getReason(), equalTo("search_terminate_after_count"));
assertThat(collector.getReason(), equalTo("search_top_hits"));
assertThat(collector.getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(1));
assertThat(collector.getProfiledChildren().get(0).getReason(), equalTo("search_top_hits"));
assertThat(collector.getProfiledChildren().get(0).getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(0));
});

reader.close();
Expand Down

0 comments on commit 6bb9e3e

Please sign in to comment.