Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8280124: Reduce branches decoding latin-1 chars from UTF-8 encoded bytes
Reviewed-by: rriggs, alanb, naoto
  • Loading branch information
cl4es committed Jan 18, 2022
1 parent bdfa15d commit e314a4c
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 46 deletions.
6 changes: 2 additions & 4 deletions src/java.base/share/classes/java/lang/String.java
Expand Up @@ -541,8 +541,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
offset++;
continue;
}
if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
offset + 1 < sl) {
if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3
int b2 = bytes[offset + 1];
if (!isNotContinuation(b2)) {
dst[dp++] = (byte)decode2(b1, b2);
Expand Down Expand Up @@ -698,8 +697,7 @@ static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
offset++;
continue;
}
if ((b1 == (byte) 0xc2 || b1 == (byte) 0xc3) &&
offset + 1 < sl) {
if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3
int b2 = bytes[offset + 1];
if (!isNotContinuation(b2)) {
dst[dp++] = (byte) decode2(b1, b2);
Expand Down
145 changes: 103 additions & 42 deletions test/micro/org/openjdk/bench/java/lang/StringDecode.java
Expand Up @@ -40,59 +40,120 @@

@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgs = "-Xmx1g")
@Fork(value = 3)
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 3)
@State(Scope.Thread)
public class StringDecode {

@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgs = "-Xmx1g")
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 2)
@State(Scope.Thread)
public static class WithCharset {

@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"})
private String charsetName;

private Charset charset;
private byte[] asciiString;
private byte[] utf16String;

@Setup
public void setup() {
charset = Charset.forName(charsetName);
asciiString = "ascii string".getBytes(charset);
utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
}

@Benchmark
public void decodeCharsetName(Blackhole bh) throws Exception {
bh.consume(new String(asciiString, charsetName));
bh.consume(new String(utf16String, charsetName));
}

@Benchmark
public void decodeCharset(Blackhole bh) throws Exception {
bh.consume(new String(asciiString, charset));
bh.consume(new String(utf16String, charset));
}
}
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"})
private String charsetName;

private byte[] asciiDefaultString;
private byte[] utf16DefaultString;
private Charset charset;
private byte[] asciiString;
private byte[] utf16String;
private byte[] longUtf16String;
private byte[] longUtf16StartString;
private byte[] longLatin1String;

@Setup
public void setup() {
asciiDefaultString = "ascii string".getBytes();
utf16DefaultString = "UTF-\uFF11\uFF16 string".getBytes();
charset = Charset.forName(charsetName);
asciiString = "ascii string".getBytes(charset);
utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
longUtf16String = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
Suspendisse potenti.
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
hendrerit sapien. \uFF11Donec aliquam mattis lectus eu ultrices. Duis eu nisl\uFF11
euismod, blandit mauris vel, \uFF11placerat urna. Etiam malesuada enim purus,
tristique mollis odio blandit quis.\uFF11 Vivamus posuere. \uFF11
\uFF11
""".getBytes(charset);
longUtf16StartString = """
\uFF11
Lorem ipsum dolor sit amet, \uFF11consectetur adipiscing elit. Aliquam ac sem eu
urna egestas \uFF11placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
Nulla \uFF11nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
sapien in \uFF11magna porta ultricies. \uFF11Sed vel pellentesque nibh. Pellentesque dictum
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
Suspendisse potenti.
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
tristique mollis odio blandit quis. Vivamus posuere.
""".getBytes(charset);

longLatin1String = """
a\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
b\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
c\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
d\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
e\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
f\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
g\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
h\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
i\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
j\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6
k\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6
l\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6
m\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6
""".getBytes(charset);
}

@Benchmark
public String decodeAsciiCharsetName() throws Exception {
return new String(asciiString, charsetName);
}

@Benchmark
public String decodeAscii() throws Exception {
return new String(asciiString, charset);
}

@Benchmark
public String decodeLatin1Long() throws Exception {
return new String(longLatin1String, charset);
}

@Benchmark
public String decodeUTF16Short() throws Exception {
return new String(utf16String, charset);
}

@Benchmark
public String decodeUTF16LongEnd() throws Exception {
return new String(longUtf16String, charset);
}

@Benchmark
public String decodeUTF16LongStart() throws Exception {
return new String(longUtf16StartString, charset);
}

@Benchmark
public void decodeDefault(Blackhole bh) throws Exception {
bh.consume(new String(asciiDefaultString));
bh.consume(new String(utf16DefaultString));
public void decodeUTF16LongMixed(Blackhole bh) throws Exception {
bh.consume(new String(longUtf16StartString, charset));
bh.consume(new String(longUtf16String, charset));
}
}

1 comment on commit e314a4c

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.