Skip to content

Commit

Permalink
Fail composite aggregation if after key is unparsable (elastic#74252)
Browse files Browse the repository at this point in the history
If the after key cannot parse back to the same value we generated it from, when the client sends it back we will land on a wrong page.  If this page is earlier, it is likely we will (eventually) generate the same wrong after key, resulting in an infinite loop as the client repeatedly ask to retrieve the same page or pages of data.  This change fixes that by failing the composite aggregation (with an exception) if the after key is unparsable with the given format.  We provide as much information about what failed as possible.
 Conflicts:
	server/src/main/java/org/elasticsearch/search/DocValueFormat.java
  • Loading branch information
not-napoleon committed Jun 22, 2021
1 parent e5e738f commit a29d883
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 10 deletions.
Expand Up @@ -511,6 +511,61 @@ setup:
- match: { aggregations.test.buckets.0.doc_count: 1 }

---
"Composite aggregation with invalid format":
- skip:
version: " - 7.99.99"
reason: After key parse checking added in 7.14 (backport pending)

- do:
catch: /created output it couldn't parse/
search:
rest_total_hits_as_int: true
index: test
body:
aggregations:
test:
composite:
sources: [
{
"date": {
"date_histogram": {
"field": "date",
"calendar_interval": "1d",
# mixes week based and month based
"format": "YYYY-MM-dd"
}
}
}
]

---
"Composite aggregation with lossy format":
- skip:
version: " - 7.99.99"
reason: After key parse checking added in 7.14 (backport pending)

- do:
catch: /created output it couldn't parse/
search:
rest_total_hits_as_int: true
index: test
body:
aggregations:
test:
composite:
sources: [
{
"date": {
"date_histogram": {
"field": "date",
"calendar_interval": "1d",
# format is lower resolution than buckets, after key will lose data
"format": "yyyy-MM"
}
}
}
]
---
"Composite aggregation with date_histogram offset":
- skip:
version: " - 7.5.99"
Expand Down
Expand Up @@ -10,11 +10,10 @@

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils;

import java.io.IOException;
import java.util.function.LongUnaryOperator;
Expand Down Expand Up @@ -44,7 +43,11 @@ void setAfter(Comparable value) {
} else if (value instanceof Number) {
afterValue = ((Number) value).longValue();
} else {
afterValue = GeoTileUtils.longEncode(value.toString());
afterValue = format.parseLong(
value.toString(),
false,
() -> { throw new IllegalArgumentException("now() is not supported in [after] key"); }
);
}
}
}
Expand Up @@ -405,34 +405,58 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
* Format <code>obj</code> using the provided {@link DocValueFormat}.
* If the format is equals to {@link DocValueFormat#RAW}, the object is returned as is
* for numbers and a string for {@link BytesRef}s.
*
* This method will then attempt to parse the formatted value using the specified format,
* and throw an IllegalArgumentException if parsing fails. This in turn prevents us from
* returning an after_key which we can't subsequently parse into the original value.
*/
static Object formatObject(Object obj, DocValueFormat format) {
if (obj == null) {
return null;
}
Object formatted = obj;
Object parsed;
if (obj.getClass() == BytesRef.class) {
BytesRef value = (BytesRef) obj;
if (format == DocValueFormat.RAW) {
return value.utf8ToString();
formatted = value.utf8ToString();
} else {
return format.format(value);
formatted = format.format(value);
}
parsed = format.parseBytesRef(formatted.toString());
if (parsed.equals(obj) == false) {
throw new IllegalArgumentException("Format [" + format + "] created output it couldn't parse for value [" + obj +"] "
+ "of type [" + obj.getClass() + "]. parsed value: [" + parsed + "(" + parsed.getClass() + ")]");
}
} else if (obj.getClass() == Long.class) {
long value = (long) obj;
if (format == DocValueFormat.RAW) {
return value;
formatted = value;
} else {
return format.format(value);
formatted = format.format(value);
}
parsed = format.parseLong(formatted.toString(), false, () -> {
throw new UnsupportedOperationException("Using now() is not supported in after keys");
});
if (parsed.equals(((Number) obj).longValue()) == false) {
throw new IllegalArgumentException("Format [" + format + "] created output it couldn't parse for value [" + obj +"] "
+ "of type [" + obj.getClass() + "]. parsed value: [" + parsed + "(" + parsed.getClass() + ")]");
}
} else if (obj.getClass() == Double.class) {
double value = (double) obj;
if (format == DocValueFormat.RAW) {
return value;
formatted = value;
} else {
return format.format(value);
formatted = format.format(value);
}
parsed = format.parseDouble(formatted.toString(), false,
() -> {throw new UnsupportedOperationException("Using now() is not supported in after keys");});
if (parsed.equals(((Number) obj).doubleValue()) == false) {
throw new IllegalArgumentException("Format [" + format + "] created output it couldn't parse for value [" + obj +"] "
+ "of type [" + obj.getClass() + "]. parsed value: [" + parsed + "(" + parsed.getClass() + ")]");
}
}
return obj;
return formatted;
}

static class ArrayMap extends AbstractMap<String, Object> implements Comparable<ArrayMap> {
Expand Down
Expand Up @@ -377,6 +377,38 @@ public void testCompareCompositeKeyValuesHaveDifferentTypes() {
containsString("java.lang.String cannot be cast to"));
}

public void testFormatObjectChecked() {
DocValueFormat weekYearMonth = new DocValueFormat.DateTime(
// YYYY is week-based year. The parser will ignore MM (month-of-year) and dd (day-of-month)
DateFormatter.forPattern("YYYY-MM-dd"),
ZoneOffset.UTC,
DateFieldMapper.Resolution.MILLISECONDS
);
long epoch = 1622060077L; // May 25th 2021, evening
expectThrows(IllegalArgumentException.class, () -> InternalComposite.formatObject(epoch, weekYearMonth));
}

public void testFormatDateEpochTimezone() {
DocValueFormat epochSecond = new DocValueFormat.DateTime(
// YYYY is week-based year. The parser will ignore MM (month-of-year) and dd (day-of-month)
DateFormatter.forPattern("epoch_second"),
ZoneOffset.ofHours(-2),
DateFieldMapper.Resolution.MILLISECONDS
);
DocValueFormat epochMillis = new DocValueFormat.DateTime(
// YYYY is week-based year. The parser will ignore MM (month-of-year) and dd (day-of-month)
DateFormatter.forPattern("epoch_millis"),
ZoneOffset.ofHours(-2),
DateFieldMapper.Resolution.MILLISECONDS
);
long epoch = 1622060077L; // May 25th 2021, evening
Object actual = InternalComposite.formatObject(epoch, epochSecond);
assertEquals("1622060.077", actual.toString());

actual = InternalComposite.formatObject(epoch, epochMillis);
assertEquals("1622060077", actual.toString());
}

private InternalComposite.ArrayMap createMap(List<String> fields, Comparable[] values) {
List<DocValueFormat> formats = IntStream.range(0, fields.size())
.mapToObj(i -> DocValueFormat.RAW).collect(Collectors.toList());
Expand Down

0 comments on commit a29d883

Please sign in to comment.