Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8279833: Loop optimization issue in String.encodeUTF8_UTF16
Reviewed-by: stuefe
Backport-of: ff8565931115d581afff679ea85b1a2d80c03b99
  • Loading branch information
GoeLin committed Jan 26, 2022
1 parent 8352e38 commit 84ed967
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 4 deletions.
11 changes: 7 additions & 4 deletions src/java.base/share/classes/java/lang/StringCoding.java
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -921,14 +921,17 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
char c;
while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
while (sp < sl) {
// ascii fast loop;
char c = StringUTF16.getChar(val, sp);
if (c >= '\u0080') {
break;
}
dst[dp++] = (byte)c;
sp++;
}
while (sp < sl) {
c = StringUTF16.getChar(val, sp++);
char c = StringUTF16.getChar(val, sp++);
if (c < 0x80) {
dst[dp++] = (byte)c;
} else if (c < 0x800) {
Expand Down
124 changes: 124 additions & 0 deletions test/micro/org/openjdk/bench/java/lang/StringEncode.java
@@ -0,0 +1,124 @@
/*
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang;

import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

import java.nio.charset.Charset;
import java.util.concurrent.TimeUnit;

@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 3)
@State(Scope.Thread)
public class StringEncode {

@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
private String charsetName;
private Charset charset;
private String asciiString;
private String utf16String;
private String longUtf16String;
private String longUtf16StartString;

@Setup
public void setup() {
charset = Charset.forName(charsetName);
asciiString = "ascii string";
utf16String = "UTF-\uFF11\uFF16 string";
longUtf16String =
" Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu\n" +
" urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.\n" +
" Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et\n" +
" sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum\n" +
" dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent\n" +
" per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla\n" +
" sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida\n" +
" efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.\n" +
" Suspendisse potenti.\n" +
"\n" +
" Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis\n" +
" nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet\n" +
" sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum\n" +
" consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.\n" +
" Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id\n" +
" hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl\n" +
" euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,\n" +
" tristique mollis odio blandit quis. Vivamus posuere.\n" +
" \uFF11";
longUtf16StartString =
" \uFF11" +
" Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu\n" +
" urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.\n" +
" Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et\n" +
" sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum\n" +
" dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent\n" +
" per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla\n" +
" sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida\n" +
" efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.\n" +
" Suspendisse potenti.\n" +
"\n" +
" Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis\n" +
" nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet\n" +
" sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum\n" +
" consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.\n" +
" Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id\n" +
" hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl\n" +
" euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,\n" +
" tristique mollis odio blandit quis. Vivamus posuere.\n";
}

@Benchmark
public byte[] encodeAsciiCharsetName() throws Exception {
return asciiString.getBytes(charset);
}

@Benchmark
public byte[] encodeAscii() throws Exception {
return asciiString.getBytes(charset);
}

@Benchmark
public void encodeMix(Blackhole bh) throws Exception {
bh.consume(asciiString.getBytes(charset));
bh.consume(utf16String.getBytes(charset));
}

@Benchmark
public byte[] encodeUTF16LongEnd() throws Exception {
return longUtf16String.getBytes(charset);
}

@Benchmark
public byte[] encodeUTF16LongStart() throws Exception {
return longUtf16StartString.getBytes(charset);
}

@Benchmark
public byte[] encodeUTF16() throws Exception {
return utf16String.getBytes(charset);
}
}

1 comment on commit 84ed967

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.