Skip to content

Commit

Permalink
Parse composite patterns using ClassicFormat.parseObject (elastic#40100)
Browse files Browse the repository at this point in the history
Java-time fails parsing composite patterns when first pattern matches only the prefix of the input. It expects pattern in longest to shortest order. Because of this constructing just one DateTimeFormatter with appendOptional is not sufficient. Parsers have to be iterated and if the parsing fails, the next one in order should be used. In order to not degrade performance parsing should not be throw exceptions on failure. Format.parseObject was used as it only returns null when parsing failed and allows to check if full input was read.
 
closes elastic#39916
  • Loading branch information
pgomulka committed Mar 27, 2019
1 parent b250cb7 commit 7e4d23d
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1585,7 +1585,7 @@ static JavaDateFormatter merge(String pattern, List<DateFormatter> formatters) {
if (printer == null) {
printer = javaDateFormatter.getPrinter();
}
dateTimeFormatters.add(javaDateFormatter.getParser());
dateTimeFormatters.addAll(javaDateFormatter.getParsers());
roundupBuilder.appendOptional(javaDateFormatter.getRoundupParser());
}
DateTimeFormatter roundUpParser = roundupBuilder.toFormatter(Locale.ROOT);
Expand Down Expand Up @@ -1632,7 +1632,7 @@ public static ZonedDateTime from(TemporalAccessor accessor) {
if (zoneId == null) {
zoneId = ZoneOffset.UTC;
}

LocalDate localDate = accessor.query(TemporalQueries.localDate());
LocalTime localTime = accessor.query(TemporalQueries.localTime());
boolean isLocalDateSet = localDate != null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.elasticsearch.common.Strings;

import java.text.ParsePosition;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
Expand All @@ -29,7 +30,10 @@
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalField;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
Expand All @@ -39,6 +43,7 @@ class JavaDateFormatter implements DateFormatter {

// base fields which should be used for default parsing, when we round up for date math
private static final Map<TemporalField, Long> ROUND_UP_BASE_FIELDS = new HashMap<>(6);

{
ROUND_UP_BASE_FIELDS.put(ChronoField.MONTH_OF_YEAR, 1L);
ROUND_UP_BASE_FIELDS.put(ChronoField.DAY_OF_MONTH, 1L);
Expand All @@ -50,22 +55,15 @@ class JavaDateFormatter implements DateFormatter {

private final String format;
private final DateTimeFormatter printer;
private final DateTimeFormatter parser;
private final List<DateTimeFormatter> parsers;
private final DateTimeFormatter roundupParser;

private JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeFormatter roundupParser, DateTimeFormatter parser) {
this.format = format;
this.printer = printer;
this.roundupParser = roundupParser;
this.parser = parser;
}

JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeFormatter... parsers) {
this(format, printer, builder -> ROUND_UP_BASE_FIELDS.forEach(builder::parseDefaulting), parsers);
}

JavaDateFormatter(String format, DateTimeFormatter printer, Consumer<DateTimeFormatterBuilder> roundupParserConsumer,
DateTimeFormatter... parsers) {
DateTimeFormatter... parsers) {
if (printer == null) {
throw new IllegalArgumentException("printer may not be null");
}
Expand All @@ -79,26 +77,21 @@ private JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeForm
}
this.printer = printer;
this.format = format;

if (parsers.length == 0) {
this.parser = printer;
} else if (parsers.length == 1) {
this.parser = parsers[0];
this.parsers = Collections.singletonList(printer);
} else {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
for (DateTimeFormatter parser : parsers) {
builder.appendOptional(parser);
}
this.parser = builder.toFormatter(Locale.ROOT);
this.parsers = Arrays.asList(parsers);
}

DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
if (format.contains("||") == false) {
builder.append(this.parser);
builder.append(this.parsers.get(0));
}
roundupParserConsumer.accept(builder);
DateTimeFormatter roundupFormatter = builder.toFormatter(parser.getLocale());
DateTimeFormatter roundupFormatter = builder.toFormatter(locale());
if (printer.getZone() != null) {
roundupFormatter = roundupFormatter.withZone(printer.getZone());
roundupFormatter = roundupFormatter.withZone(zone());
}
this.roundupParser = roundupFormatter;
}
Expand All @@ -107,10 +100,6 @@ DateTimeFormatter getRoundupParser() {
return roundupParser;
}

DateTimeFormatter getParser() {
return parser;
}

DateTimeFormatter getPrinter() {
return printer;
}
Expand All @@ -122,30 +111,64 @@ public TemporalAccessor parse(String input) {
}

try {
return parser.parse(input);
return doParse(input);
} catch (DateTimeParseException e) {
throw new IllegalArgumentException("failed to parse date field [" + input + "] with format [" + format + "]", e);
}
}

/**
* Attempt parsing the input without throwing exception. If multiple parsers are provided,
* it will continue iterating if the previous parser failed. The pattern must fully match, meaning whole input was used.
* This also means that this method depends on <code>DateTimeFormatter.ClassicFormat.parseObject</code>
* which does not throw exceptions when parsing failed.
*
* The approach with collection of parsers was taken because java-time requires ordering on optional (composite)
* patterns. Joda does not suffer from this.
* https://bugs.openjdk.java.net/browse/JDK-8188771
*
* @param input An arbitrary string resembling the string representation of a date or time
* @return a TemporalAccessor if parsing was successful.
* @throws DateTimeParseException when unable to parse with any parsers
*/
private TemporalAccessor doParse(String input) {
if (parsers.size() > 1) {
for (DateTimeFormatter formatter : parsers) {
ParsePosition pos = new ParsePosition(0);
Object object = formatter.toFormat().parseObject(input, pos);
if (parsingSucceeded(object, input, pos) == true) {
return (TemporalAccessor) object;
}
}
throw new DateTimeParseException("Failed to parse with all enclosed parsers", input, 0);
}
return this.parsers.get(0).parse(input);
}

private boolean parsingSucceeded(Object object, String input, ParsePosition pos) {
return object != null && pos.getIndex() == input.length();
}

@Override
public DateFormatter withZone(ZoneId zoneId) {
// shortcurt to not create new objects unnecessarily
if (zoneId.equals(parser.getZone())) {
if (zoneId.equals(zone())) {
return this;
}

return new JavaDateFormatter(format, printer.withZone(zoneId), roundupParser.withZone(zoneId), parser.withZone(zoneId));
return new JavaDateFormatter(format, printer.withZone(zoneId),
parsers.stream().map(p -> p.withZone(zoneId)).toArray(size -> new DateTimeFormatter[size]));
}

@Override
public DateFormatter withLocale(Locale locale) {
// shortcurt to not create new objects unnecessarily
if (locale.equals(parser.getLocale())) {
if (locale.equals(locale())) {
return this;
}

return new JavaDateFormatter(format, printer.withLocale(locale), roundupParser.withLocale(locale), parser.withLocale(locale));
return new JavaDateFormatter(format, printer.withLocale(locale),
parsers.stream().map(p -> p.withLocale(locale)).toArray(size -> new DateTimeFormatter[size]));
}

@Override
Expand All @@ -170,7 +193,7 @@ public ZoneId zone() {

@Override
public DateMathParser toDateMathParser() {
return new JavaDateMathParser(format, parser, roundupParser);
return new JavaDateMathParser(format, this, getRoundupParser());
}

@Override
Expand All @@ -186,12 +209,16 @@ public boolean equals(Object obj) {
JavaDateFormatter other = (JavaDateFormatter) obj;

return Objects.equals(format, other.format) &&
Objects.equals(locale(), other.locale()) &&
Objects.equals(this.printer.getZone(), other.printer.getZone());
Objects.equals(locale(), other.locale()) &&
Objects.equals(this.printer.getZone(), other.printer.getZone());
}

@Override
public String toString() {
return String.format(Locale.ROOT, "format[%s] locale[%s]", format, locale());
}

Collection<DateTimeFormatter> getParsers() {
return parsers;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.time.temporal.TemporalAdjusters;
import java.time.temporal.TemporalQueries;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.LongSupplier;

/**
Expand All @@ -46,11 +47,11 @@
*/
public class JavaDateMathParser implements DateMathParser {

private final DateTimeFormatter formatter;
private final JavaDateFormatter formatter;
private final DateTimeFormatter roundUpFormatter;
private final String format;

JavaDateMathParser(String format, DateTimeFormatter formatter, DateTimeFormatter roundUpFormatter) {
JavaDateMathParser(String format, JavaDateFormatter formatter, DateTimeFormatter roundUpFormatter) {
this.format = format;
Objects.requireNonNull(formatter);
this.formatter = formatter;
Expand Down Expand Up @@ -215,20 +216,20 @@ private Instant parseDateTime(String value, ZoneId timeZone, boolean roundUpIfNo
throw new ElasticsearchParseException("cannot parse empty date");
}

DateTimeFormatter formatter = roundUpIfNoTime ? this.roundUpFormatter : this.formatter;
Function<String,TemporalAccessor> formatter = roundUpIfNoTime ? this.roundUpFormatter::parse : this.formatter::parse;
try {
if (timeZone == null) {
return DateFormatters.from(formatter.parse(value)).toInstant();
return DateFormatters.from(formatter.apply(value)).toInstant();
} else {
TemporalAccessor accessor = formatter.parse(value);
TemporalAccessor accessor = formatter.apply(value);
ZoneId zoneId = TemporalQueries.zone().queryFrom(accessor);
if (zoneId != null) {
timeZone = zoneId;
}

return DateFormatters.from(accessor).withZoneSameLocal(timeZone).toInstant();
}
} catch (DateTimeParseException e) {
} catch (IllegalArgumentException | DateTimeParseException e) {
throw new ElasticsearchParseException("failed to parse date field [{}] with format [{}]: [{}]",
e, value, format, e.getMessage());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,17 @@ public void testDuellingFormatsValidParsing() {
assertSameDate("2012-W1-1", "weekyear_week_day");
}

public void testCompositeParsing(){
//in all these examples the second pattern will be used
assertSameDate("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SSS");
assertSameDate("2014-06-06T12:01:02.123", "strictDateTimeNoMillis||yyyy-MM-dd'T'HH:mm:ss.SSS");
assertSameDate("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss+HH:MM||yyyy-MM-dd'T'HH:mm:ss.SSS");
}

public void testExceptionWhenCompositeParsingFails(){
assertParseException("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SS");
}

public void testDuelingStrictParsing() {
assertSameDate("2018W313", "strict_basic_week_date");
assertParseException("18W313", "strict_basic_week_date");
Expand Down

0 comments on commit 7e4d23d

Please sign in to comment.