Skip to content

Commit

Permalink
Add advance(int) for numeric values in order to allow point based opt…
Browse files Browse the repository at this point in the history
…imization to kick in (opensearch-project#12089)

* Add advance(int) for numeric values in order to allow point based optimization to kick in

Signed-off-by: Andriy Redko <andriy.redko@aiven.io>

* Address code review comments

Signed-off-by: Andriy Redko <andriy.redko@aiven.io>

---------

Signed-off-by: Andriy Redko <andriy.redko@aiven.io>
  • Loading branch information
reta authored and Peter Alfonsi committed Mar 1, 2024
1 parent bea4e5c commit 6131639
Show file tree
Hide file tree
Showing 17 changed files with 321 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix memory leak issue in ReorganizingLongHash ([#11953](https://github.com/opensearch-project/OpenSearch/issues/11953))
- Prevent setting remote_snapshot store type on index creation ([#11867](https://github.com/opensearch-project/OpenSearch/pull/11867))
- [BUG] Fix remote shards balancer when filtering throttled nodes ([#11724](https://github.com/opensearch-project/OpenSearch/pull/11724))
- Add advance(int) for numeric values in order to allow point based optimization to kick in ([#12089](https://github.com/opensearch-project/OpenSearch/pull/12089))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2389,4 +2389,185 @@ public void testLongSortOptimizationCorrectResults() throws InterruptedException
}
}

public void testSimpleSortsPoints() throws Exception {
final int docs = 100;

Random random = random();
assertAcked(
prepareCreate("test").setMapping(
XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("str_value")
.field("type", "keyword")
.endObject()
.startObject("boolean_value")
.field("type", "boolean")
.endObject()
.startObject("byte_value")
.field("type", "byte")
.endObject()
.startObject("short_value")
.field("type", "short")
.endObject()
.startObject("integer_value")
.field("type", "integer")
.endObject()
.startObject("long_value")
.field("type", "long")
.endObject()
.startObject("unsigned_long_value")
.field("type", "unsigned_long")
.endObject()
.startObject("float_value")
.field("type", "float")
.endObject()
.startObject("half_float_value")
.field("type", "half_float")
.endObject()
.startObject("double_value")
.field("type", "double")
.endObject()
.endObject()
.endObject()
)
);
ensureGreen();
BigInteger UNSIGNED_LONG_BASE = Numbers.MAX_UNSIGNED_LONG_VALUE.subtract(BigInteger.valueOf(10000 * docs));
List<IndexRequestBuilder> builders = new ArrayList<>();
for (int i = 0; i < docs / 2; i++) {
IndexRequestBuilder builder = client().prepareIndex("test")
.setId(Integer.toString(i))
.setSource(
jsonBuilder().startObject()
.field("str_value", new String(new char[] { (char) (97 + i), (char) (97 + i) }))
.field("boolean_value", true)
.field("byte_value", i)
.field("short_value", i)
.field("integer_value", i)
.field("long_value", i)
.field("unsigned_long_value", UNSIGNED_LONG_BASE.add(BigInteger.valueOf(10000 * i)))
.field("float_value", 32 * i)
.field("half_float_value", 16 * i)
.field("double_value", 64 * i)
.endObject()
);
builders.add(builder);
}

// We keep half of the docs with numeric values and other half without
for (int i = docs / 2; i < docs; i++) {
IndexRequestBuilder builder = client().prepareIndex("test")
.setId(Integer.toString(i))
.setSource(
jsonBuilder().startObject().field("str_value", new String(new char[] { (char) (97 + i), (char) (97 + i) })).endObject()
);
builders.add(builder);
}

int j = 0;
Collections.shuffle(builders, random);
for (IndexRequestBuilder builder : builders) {
builder.get();
if ((++j % 25) == 0) {
refresh();
}

}
refresh();
indexRandomForConcurrentSearch("test");

final int size = 2;
// HALF_FLOAT
SearchResponse searchResponse = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(size)
.addSort("half_float_value", SortOrder.ASC)
.get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("half_float_value", SortOrder.DESC).get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));

// FLOAT
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("float_value", SortOrder.ASC).get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("float_value", SortOrder.DESC).get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));

// DOUBLE
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("double_value", SortOrder.ASC).get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("double_value", SortOrder.DESC).get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));

// UNSIGNED_LONG
searchResponse = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(size)
.addSort("unsigned_long_value", SortOrder.ASC)
.get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));
searchResponse = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(size)
.addSort("unsigned_long_value", SortOrder.DESC)
.get();

assertHitCount(searchResponse, docs);
assertThat(searchResponse.getHits().getHits().length, equalTo(size));
for (int i = 0; i < size; i++) {
assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i)));
}

assertThat(searchResponse.toString(), not(containsString("error")));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
* aggregations, which only use {@link #advanceExact(int)} and
* {@link #longValue()}.
*
* In case when optimizations based on point values are used, the {@link #advance(int)}
* and, optionally, {@link #cost()} have to be implemented as well.
*
* @opensearch.internal
*/
public abstract class AbstractNumericDocValues extends NumericDocValues {
Expand Down
34 changes: 34 additions & 0 deletions server/src/main/java/org/opensearch/index/fielddata/FieldData.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.Numbers;
import org.opensearch.common.geo.GeoPoint;
Expand Down Expand Up @@ -76,6 +77,10 @@ public double doubleValue() throws IOException {
throw new UnsupportedOperationException();
}

@Override
public int advance(int target) throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
};
}

Expand Down Expand Up @@ -561,6 +566,10 @@ public boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
}

/**
Expand Down Expand Up @@ -591,6 +600,10 @@ public int docValueCount() {
return values.docValueCount();
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
}

/**
Expand Down Expand Up @@ -622,6 +635,12 @@ public long longValue() throws IOException {
public int docID() {
return docID;
}

@Override
public int advance(int target) throws IOException {
docID = values.advance(target);
return docID;
}
}

/**
Expand Down Expand Up @@ -683,6 +702,11 @@ public boolean advanceExact(int target) throws IOException {
public long longValue() throws IOException {
return value;
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
};
}

Expand Down Expand Up @@ -715,6 +739,11 @@ public boolean advanceExact(int target) throws IOException {
public long longValue() throws IOException {
return value.longValue();
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
};
}

Expand Down Expand Up @@ -742,6 +771,11 @@ public boolean advanceExact(int target) throws IOException {
public double doubleValue() throws IOException {
return value;
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ public long longValue() throws IOException {
public int docID() {
return docID;
}

@Override
public int advance(int target) throws IOException {
return NumericDoubleValues.this.advance(target);
}
};
}

Expand All @@ -95,6 +100,23 @@ public long longValue() throws IOException {
public int docID() {
return docID;
}

@Override
public int advance(int target) throws IOException {
return NumericDoubleValues.this.advance(target);
}
};
}

/**
* Advances to the first beyond the current whose document number is greater than or equal to
* <i>target</i>, and returns the document number itself. Exhausts the iterator and returns {@link
* org.apache.lucene.search.DocIdSetIterator#NO_MORE_DOCS} if <i>target</i> is greater than the highest document number in the set.
*
* This method is being used by {@link org.apache.lucene.search.comparators.NumericComparator.NumericLeafComparator} when point values optimization kicks
* in and is implemented by most numeric types.
*/
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,8 @@ public double nextValue() throws IOException {
return in.doubleValue();
}

@Override
public int advance(int target) throws IOException {
return in.advance(target);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,9 @@ public NumericDoubleValues getDoubleValues() {
return values;
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,8 @@ public NumericDocValues getLongValues() {
return values;
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,8 @@ public SortedNumericDocValues getLongValues() {
return values;
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,15 @@ protected SortedNumericDoubleValues() {}
*/
public abstract int docValueCount();

/**
* Advances to the first beyond the current whose document number is greater than or equal to
* <i>target</i>, and returns the document number itself. Exhausts the iterator and returns {@link
* org.apache.lucene.search.DocIdSetIterator#NO_MORE_DOCS} if <i>target</i> is greater than the highest document number in the set.
*
* This method is being used by {@link org.apache.lucene.search.comparators.NumericComparator.NumericLeafComparator} when point values optimization kicks
* in and is implemented by most numeric types.
*/
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,8 @@ public NumericDocValues getLongValues() {
return values;
}

@Override
public int advance(int target) throws IOException {
return values.advance(target);
}
}

0 comments on commit 6131639

Please sign in to comment.