Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
5e815b6
Optimization for StringBuilder append(boolean) & appendNull
wenshao Jun 9, 2024
0cbaa5a
optimize to combining values into larger stores
wenshao Jun 11, 2024
f96cde4
bug fix
wenshao Jun 11, 2024
27a3050
revert
wenshao Jun 11, 2024
dd97cff
optimize to combining values into larger stores
wenshao Jun 12, 2024
67be25c
StringLatin1 & StringUTF16 use the same logic
wenshao Jun 12, 2024
7b3cf60
fix assert
wenshao Jun 12, 2024
ad1af38
code format & use long address
wenshao Jun 12, 2024
22d4512
rename benchmark
wenshao Jun 13, 2024
b5ad8e7
optimization for x64
wenshao Jun 14, 2024
1a012f1
copyright 2024
wenshao Jun 14, 2024
3db11b2
`delete` -> `setLength`
wenshao Jun 17, 2024
fa72999
Utf16 case remove `append first utf16 char`
wenshao Jun 17, 2024
6be002a
private static final field `UNSAFE`
wenshao Jun 22, 2024
7563577
Merge remote-tracking branch 'upstream/master' into optim_str_builder…
wenshao Jul 18, 2024
9d9c8eb
replace unsafe with putChar
wenshao Jul 6, 2024
d2dcc24
Merge remote-tracking branch 'upstream/master' into optim_str_builder…
wenshao Jul 24, 2024
4df729c
Merge remote-tracking branch 'upstream/master' into optim_str_builder…
wenshao Aug 13, 2024
d01d595
Merge remote-tracking branch 'upstream/master' into optim_str_builder…
wenshao Aug 24, 2024
61196ec
Merge remote-tracking branch 'upstream/master' into optim_str_builder…
wenshao Aug 24, 2024
3c55f15
revert test
wenshao Sep 22, 2024
0ede6ed
Merge remote-tracking branch 'origin/optim_str_builder_append_202406'…
wenshao Sep 22, 2024
399c8ef
Merge remote-tracking branch 'upstream/master' into optim_str_builder…
wenshao Sep 22, 2024
ae05477
fix build error
wenshao Sep 23, 2024
d1fdcc1
Merge remote-tracking branch 'upstream/master' into optim_str_builder…
wenshao Oct 18, 2024
457735c
Merge remote-tracking branch 'origin/optim_str_builder_append_202406'…
wenshao Oct 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 8 additions & 18 deletions src/java.base/share/classes/java/lang/AbstractStringBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -640,14 +640,11 @@ private AbstractStringBuilder appendNull() {
int count = this.count;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should declare count before ensureCapacitiyInternal. Same for append boolean.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should declare count before ensureCapacitiyInternal. Same for append boolean.

Declaring count before ensureCapacityInternal will cause performance regression under x64. It took a lot of time to find this, but the underlying reason is still unclear.

byte[] val = this.value;
if (isLatin1()) {
val[count++] = 'n';
val[count++] = 'u';
val[count++] = 'l';
val[count++] = 'l';
StringLatin1.putCharsAt(val, count, 'n', 'u', 'l', 'l');
} else {
count = StringUTF16.putCharsAt(val, count, 'n', 'u', 'l', 'l');
StringUTF16.putCharsAt(val, count, 'n', 'u', 'l', 'l');
}
this.count = count;
this.count = count + 4;
return this;
}

Expand Down Expand Up @@ -772,25 +769,18 @@ public AbstractStringBuilder append(boolean b) {
byte[] val = this.value;
if (isLatin1()) {
if (b) {
val[count++] = 't';
val[count++] = 'r';
val[count++] = 'u';
val[count++] = 'e';
StringLatin1.putCharsAt(val, count, 't', 'r', 'u', 'e');
} else {
val[count++] = 'f';
val[count++] = 'a';
val[count++] = 'l';
val[count++] = 's';
val[count++] = 'e';
StringLatin1.putCharsAt(val, count, 'f', 'a', 'l', 's', 'e');
}
} else {
if (b) {
count = StringUTF16.putCharsAt(val, count, 't', 'r', 'u', 'e');
StringUTF16.putCharsAt(val, count, 't', 'r', 'u', 'e');
} else {
count = StringUTF16.putCharsAt(val, count, 'f', 'a', 'l', 's', 'e');
StringUTF16.putCharsAt(val, count, 'f', 'a', 'l', 's', 'e');
}
}
this.count = count;
this.count = count + (b ? 4 : 5);
return this;
}

Expand Down
24 changes: 24 additions & 0 deletions src/java.base/share/classes/java/lang/StringLatin1.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.function.IntConsumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import jdk.internal.misc.Unsafe;
import jdk.internal.util.ArraysSupport;
import jdk.internal.util.DecimalDigits;
import jdk.internal.vm.annotation.IntrinsicCandidate;
Expand All @@ -42,6 +43,8 @@
import static java.lang.String.checkOffset;

final class StringLatin1 {
private static final Unsafe UNSAFE = Unsafe.getUnsafe();

public static char charAt(byte[] value, int index) {
checkIndex(index, value.length);
return (char)(value[index] & 0xff);
Expand Down Expand Up @@ -824,6 +827,27 @@ static Stream<String> lines(byte[] value) {
return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
}

static void putCharsAt(byte[] val, int index, int c1, int c2, int c3, int c4) {
assert index >= 0 && index + 3 < length(val) : "Trusted caller missed bounds check";
// Don't use the putChar method, Its instrinsic will cause C2 unable to combining values into larger stores.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StringLatin1 has no putChar method. I think you can just convert this to explicit array stores for the merge store optimization.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

explicit array stores has the overhead of boundary checking. If putCharsAt of StringLatin1 is not implemented based on Unsafe, the performance will be worse than StringUTF16.

Of course, this is a common problem. StringUTF16.putChar is equivalent to Unsafe.putChar, without boundary checking. I found in many test scenarios that the UTF16 version performs better than the StringLatin1 version.

We may need to change some StringLatin1 related implementations to use Unsafe, otherwise users will turn off COMPACT_STRINGS to improve performance.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for telling what the blocking issue is here. Does C2 not merge the bound checks when it does the merge stores? Interesting, and I think a fix from their side should be the way to go.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MergeStore does not work when using StringUTF16.putChar, waiting for improvements from @eme64

long address = Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
UNSAFE.putByte(val, address , (byte)(c1));
UNSAFE.putByte(val, address + 1, (byte)(c2));
UNSAFE.putByte(val, address + 2, (byte)(c3));
UNSAFE.putByte(val, address + 3, (byte)(c4));
}

static void putCharsAt(byte[] val, int index, int c1, int c2, int c3, int c4, int c5) {
assert index >= 0 && index + 4 < length(val) : "Trusted caller missed bounds check";
// Don't use the putChar method, Its instrinsic will cause C2 unable to combining values into larger stores.
long address = Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
UNSAFE.putByte(val, address , (byte)(c1));
UNSAFE.putByte(val, address + 1, (byte)(c2));
UNSAFE.putByte(val, address + 2, (byte)(c3));
UNSAFE.putByte(val, address + 3, (byte)(c4));
UNSAFE.putByte(val, address + 4, (byte)(c5));
}

public static void putChar(byte[] val, int index, int c) {
//assert (canEncode(c));
val[index] = (byte)(c);
Expand Down
36 changes: 14 additions & 22 deletions src/java.base/share/classes/java/lang/StringUTF16.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import static java.lang.String.LATIN1;

final class StringUTF16 {

// Return a new byte array for a UTF16-coded string for len chars
// Throw an exception if out of range
public static byte[] newBytesFor(int len) {
Expand Down Expand Up @@ -1548,27 +1547,20 @@ public static boolean contentEquals(byte[] value, CharSequence cs, int len) {
return true;
}

public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
int end = i + 4;
checkBoundsBeginEnd(i, end, value);
putChar(value, i++, c1);
putChar(value, i++, c2);
putChar(value, i++, c3);
putChar(value, i++, c4);
assert(i == end);
return end;
}

public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
int end = i + 5;
checkBoundsBeginEnd(i, end, value);
putChar(value, i++, c1);
putChar(value, i++, c2);
putChar(value, i++, c3);
putChar(value, i++, c4);
putChar(value, i++, c5);
assert(i == end);
return end;
static void putCharsAt(byte[] val, int index, int c1, int c2, int c3, int c4) {
assert index >= 0 && index + 3 < length(val) : "Trusted caller missed bounds check";
putChar(val, index , c1);
putChar(val, index + 1, c2);
putChar(val, index + 2, c3);
putChar(val, index + 3, c4);
}

static void putCharsAt(byte[] val, int index, int c1, int c2, int c3, int c4, int c5) {
putChar(val, index , c1);
putChar(val, index + 1, c2);
putChar(val, index + 2, c3);
putChar(val, index + 3, c4);
putChar(val, index + 4, c5);
}

public static char charAt(byte[] value, int index) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -133,11 +133,17 @@ public static boolean contentEquals(byte[] value, CharSequence cs, int len) {
}

public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
return StringUTF16.putCharsAt(value, i, c1, c2, c3, c4);
int end = i + 4;
StringUTF16.checkBoundsBeginEnd(i, end, value);
StringUTF16.putCharsAt(value, i, c1, c2, c3, c4);
return end;
}

public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
return StringUTF16.putCharsAt(value, i, c1, c2, c3, c4, c5);
int end = i + 5;
StringUTF16.checkBoundsBeginEnd(i, end, value);
StringUTF16.putCharsAt(value, i, c1, c2, c3, c4, c5);
return end;
}

public static char charAt(byte[] value, int index) {
Expand Down
71 changes: 60 additions & 11 deletions test/micro/org/openjdk/bench/java/lang/StringBuilders.java
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,66 @@ public String toStringCharWithInt8() {


@Benchmark
public String toStringCharWithBool8() {
StringBuilder result = new StringBuilder();
result.append(true);
result.append(false);
result.append(true);
result.append(true);
result.append(false);
result.append(true);
result.append(false);
result.append(false);
return result.toString();
public int appendWithBool8Latin1() {
StringBuilder buf = sbLatin1;
buf.setLength(0);
buf.append(true);
buf.append(false);
buf.append(true);
buf.append(true);
buf.append(false);
buf.append(true);
buf.append(false);
buf.append(false);
return buf.length();
}


@Benchmark
public int appendWithBool8Utf16() {
StringBuilder buf = sbUtf16;
buf.setLength(0);
buf.append(true);
buf.append(false);
buf.append(true);
buf.append(true);
buf.append(false);
buf.append(true);
buf.append(false);
buf.append(false);
return buf.length();
}


@Benchmark
public int appendWithNull8Latin1() {
StringBuilder buf = sbLatin1;
buf.setLength(0);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
return buf.length();
}


@Benchmark
public int appendWithNull8Utf16() {
StringBuilder buf = sbUtf16;
buf.setLength(0);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
buf.append((String) null);
return buf.length();
}


Expand Down