Skip to content

Commit

Permalink
8316704: Regex-free parsing of Formatter and FormatProcessor specifiers
Browse files Browse the repository at this point in the history
Reviewed-by: redestad, rgiulietti
  • Loading branch information
wenshao authored and rgiulietti committed Feb 6, 2024
1 parent 51d7169 commit 50b17d9
Show file tree
Hide file tree
Showing 5 changed files with 293 additions and 38 deletions.
39 changes: 26 additions & 13 deletions src/java.base/share/classes/java/util/FormatProcessor.java
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -30,7 +31,6 @@
import java.lang.invoke.MethodType;
import java.lang.StringTemplate.Processor;
import java.lang.StringTemplate.Processor.Linkage;
import java.util.regex.Matcher;

import jdk.internal.javac.PreviewFeature;

Expand Down Expand Up @@ -218,22 +218,35 @@ public MethodHandle linkage(List<String> fragments, MethodType type) {
* @throws MissingFormatArgumentException if not at end or found and not needed
*/
private static boolean findFormat(String fragment, boolean needed) {
Matcher matcher = Formatter.FORMAT_SPECIFIER_PATTERN.matcher(fragment);
String group;

while (matcher.find()) {
group = matcher.group();
int max = fragment.length();
for (int i = 0; i < max;) {
int n = fragment.indexOf('%', i);
if (n < 0) {
return false;
}

if (!group.equals("%%") && !group.equals("%n")) {
if (matcher.end() == fragment.length() && needed) {
return true;
}
i = n + 1;
if (i >= max) {
return false;
}

throw new MissingFormatArgumentException(group +
" is not immediately followed by an embedded expression");
char c = fragment.charAt(i);
if (c == '%' || c == 'n') {
i++;
continue;
}
int off = new Formatter.FormatSpecifierParser(null, c, i, fragment, max)
.parse();
if (off == 0) {
return false;
}
if (i + off == max && needed) {
return true;
}
throw new MissingFormatArgumentException(
fragment.substring(i - 1, i + off)
+ " is not immediately followed by an embedded expression");
}

return false;
}

Expand Down
228 changes: 203 additions & 25 deletions src/java.base/share/classes/java/util/Formatter.java
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -49,8 +50,6 @@
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.spi.NumberFormatProvider;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import java.time.DateTimeException;
import java.time.Instant;
Expand Down Expand Up @@ -2810,20 +2809,14 @@ public Formatter format(Locale l, String format, Object ... args) {
return this;
}

// %[argument_index$][flags][width][.precision][t]conversion
static final String FORMAT_SPECIFIER
= "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";

static final Pattern FORMAT_SPECIFIER_PATTERN = Pattern.compile(FORMAT_SPECIFIER);

/**
* Finds format specifiers in the format string.
*/
static List<FormatString> parse(String s) {
FormatSpecifierParser parser = null;
ArrayList<FormatString> al = new ArrayList<>();
int i = 0;
int max = s.length();
Matcher m = null; // create if needed
while (i < max) {
int n = s.indexOf('%', i);
if (n < 0) {
Expand All @@ -2846,14 +2839,16 @@ static List<FormatString> parse(String s) {
al.add(new FormatSpecifier(c));
i++;
} else {
if (m == null) {
m = FORMAT_SPECIFIER_PATTERN.matcher(s);
}
// We have already parsed a '%' at n, so we either have a
// match or the specifier at n is invalid
if (m.find(n) && m.start() == n) {
al.add(new FormatSpecifier(s, m));
i = m.end();
if (parser == null) {
parser = new FormatSpecifierParser(al, c, i, s, max);
} else {
parser.reset(c, i);
}
int off = parser.parse();
if (off > 0) {
i += off;
} else {
throw new UnknownFormatConversionException(String.valueOf(c));
}
Expand All @@ -2862,6 +2857,159 @@ static List<FormatString> parse(String s) {
return al;
}

static final class FormatSpecifierParser {
final ArrayList<FormatString> al;
final String s;
final int max;
char first;
int start;
int off;
char c;
int argSize;
int flagSize;
int widthSize;

FormatSpecifierParser(ArrayList<FormatString> al, char first, int start, String s, int max) {
this.al = al;

this.first = first;
this.c = first;
this.start = start;
this.off = start;

this.s = s;
this.max = max;
}

void reset(char first, int start) {
this.first = first;
this.c = first;
this.start = start;
this.off = start;

argSize = 0;
flagSize = 0;
widthSize = 0;
}

/**
* If a valid format specifier is found, construct a FormatString and add it to {@link #al}.
* The format specifiers for general, character, and numeric types have
* the following syntax:
*
* <blockquote><pre>
* %[argument_index$][flags][width][.precision]conversion
* </pre></blockquote>
*
* As described by the following regular expression:
*
* <blockquote><pre>
* %(\d+\$)?([-#+ 0,(\<]*)?(\d+)?(\.\d+)?([tT])?([a-zA-Z%])
* </pre></blockquote>
*
* @return the length of the format specifier. If no valid format specifier is found, 0 is returned.
*/
int parse() {
int precisionSize = 0;

// (\d+\$)?
parseArgument();

// ([-#+ 0,(\<]*)?
parseFlag();

// (\d+)?
parseWidth();

if (c == '.') {
// (\.\d+)?
precisionSize = parsePrecision();
if (precisionSize == -1) {
return 0;
}
}

// ([tT])?([a-zA-Z%])
char t = '\0', conversion = '\0';
if ((c == 't' || c == 'T') && off + 1 < max) {
char c1 = s.charAt(off + 1);
if (isConversion(c1)) {
t = c;
conversion = c1;
off += 2;
}
} else if (isConversion(c)) {
conversion = c;
++off;
} else {
return 0;
}

if (argSize + flagSize + widthSize + precisionSize + t + conversion != 0) {
if (al != null) {
FormatSpecifier formatSpecifier
= new FormatSpecifier(s, start, argSize, flagSize, widthSize, precisionSize, t, conversion);
al.add(formatSpecifier);
}
return off - start;
}
return 0;
}

private void parseArgument() {
// (\d+\$)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i == off || c != '$') {
c = first;
return;
}

i++; // skip '$'
if (i < max) {
c = s.charAt(i);
}

argSize = i - off;
off = i;
}

private void parseFlag() {
// ([-#+ 0,(\<]*)?
int i = off;
for (; i < max && Flags.isFlag(c = s.charAt(i)); ++i); // empty body
flagSize = i - off;
off = i;
}

private void parseWidth() {
// (\d+)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
widthSize = i - off;
off = i;
}

private int parsePrecision() {
int i = ++off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i != off) {
int size = i - off + 1;
off = i;
return size;
}
return -1;
}
}

static boolean isConversion(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '%';
}

private static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

interface FormatString {
int index();
void print(Formatter fmt, Object arg, Locale l) throws IOException;
Expand Down Expand Up @@ -2984,21 +3132,44 @@ private void conversion(char conv) {
}
}

FormatSpecifier(String s, Matcher m) {
index(s, m.start(1), m.end(1));
flags(s, m.start(2), m.end(2));
width(s, m.start(3), m.end(3));
precision(s, m.start(4), m.end(4));

int tTStart = m.start(5);
if (tTStart >= 0) {
FormatSpecifier(
String s,
int i,
int argSize,
int flagSize,
int widthSize,
int precisionSize,
char t,
char conversion
) {
int argEnd = i + argSize;
int flagEnd = argEnd + flagSize;
int widthEnd = flagEnd + widthSize;
int precisionEnd = widthEnd + precisionSize;

if (argSize > 0) {
index(s, i, argEnd);
}
if (flagSize > 0) {
flags(s, argEnd, flagEnd);
}
if (widthSize > 0) {
width(s, flagEnd, widthEnd);
}
if (precisionSize > 0) {
precision(s, widthEnd, precisionEnd);
}
if (t != '\0') {
dt = true;
if (s.charAt(tTStart) == 'T') {
if (t == 'T') {
flags = Flags.add(flags, Flags.UPPERCASE);
}
}
conversion(s.charAt(m.start(6)));
conversion(conversion);
check();
}

private void check() {
if (dt)
checkDateTime();
else if (Conversion.isGeneral(c))
Expand Down Expand Up @@ -4705,6 +4876,13 @@ private static int parse(char c) {
};
}

private static boolean isFlag(char c) {
return switch (c) {
case '-', '#', '+', ' ', '0', ',', '(', '<' -> true;
default -> false;
};
}

// Returns a string representation of the current {@code Flags}.
public static String toString(int f) {
StringBuilder sb = new StringBuilder();
Expand Down

1 comment on commit 50b17d9

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.