Skip to content

Commit 155abc5

Browse files
Roger Riggsdafedafecl4esoffamitkumarTheRealMDoerr
committed
8311906: Improve robustness of String constructors with mutable array inputs
Co-authored-by: Damon Fenacci <dfenacci@openjdk.org> Co-authored-by: Claes Redestad <redestad@openjdk.org> Co-authored-by: Amit Kumar <amitkumar@openjdk.org> Co-authored-by: Martin Doerr <mdoerr@openjdk.org> Reviewed-by: rgiulietti, thartmann, redestad, dfenacci
1 parent 316b783 commit 155abc5

File tree

15 files changed

+1305
-253
lines changed

15 files changed

+1305
-253
lines changed

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -5651,7 +5651,7 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
56515651
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
56525652
// return the number of characters copied.
56535653
// - java/lang/StringUTF16.compress
5654-
// return zero (0) if copy fails, otherwise 'len'.
5654+
// return index of non-latin1 character if copy fails, otherwise 'len'.
56555655
//
56565656
// This version always returns the number of characters copied, and does not
56575657
// clobber the 'len' register. A successful copy will complete with the post-
@@ -5868,15 +5868,15 @@ address MacroAssembler::byte_array_inflate(Register src, Register dst, Register
58685868
}
58695869

58705870
// Compress char[] array to byte[].
5871+
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
5872+
// Return the array length if every element in array can be encoded,
5873+
// otherwise, the index of first non-latin1 (> 0xff) character.
58715874
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
58725875
Register res,
58735876
FloatRegister tmp0, FloatRegister tmp1,
58745877
FloatRegister tmp2, FloatRegister tmp3,
58755878
FloatRegister tmp4, FloatRegister tmp5) {
58765879
encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
5877-
// Adjust result: res == len ? len : 0
5878-
cmp(len, res);
5879-
csel(res, res, zr, EQ);
58805880
}
58815881

58825882
// java.math.round(double a)

src/hotspot/cpu/ppc/ppc.ad

+2-10
Original file line numberDiff line numberDiff line change
@@ -12727,16 +12727,8 @@ instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst re
1272712727
ins_cost(300);
1272812728
format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
1272912729
ins_encode %{
12730-
Label Lskip, Ldone;
12731-
__ li($result$$Register, 0);
12732-
__ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12733-
$tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
12734-
__ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12735-
__ beq(CCR0, Lskip);
12736-
__ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
12737-
__ bind(Lskip);
12738-
__ mr($result$$Register, $len$$Register);
12739-
__ bind(Ldone);
12730+
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
12731+
$tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, false);
1274012732
%}
1274112733
ins_pipe(pipe_class_default);
1274212734
%}

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

+4-6
Original file line numberDiff line numberDiff line change
@@ -1933,22 +1933,20 @@ void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Registe
19331933
}
19341934

19351935
// Compress char[] array to byte[].
1936-
// result: the array length if every element in array can be encoded; 0, otherwise.
1936+
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
1937+
// result: the array length if every element in array can be encoded,
1938+
// otherwise, the index of first non-latin1 (> 0xff) character.
19371939
void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len,
19381940
Register result, Register tmp) {
1939-
Label done;
19401941
encode_iso_array_v(src, dst, len, result, tmp, false);
1941-
beqz(len, done);
1942-
mv(result, zr);
1943-
bind(done);
19441942
}
19451943

19461944
// Intrinsic for
19471945
//
19481946
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
19491947
// return the number of characters copied.
19501948
// - java/lang/StringUTF16.compress
1951-
// return zero (0) if copy fails, otherwise 'len'.
1949+
// return index of non-latin1 character if copy fails, otherwise 'len'.
19521950
//
19531951
// This version always returns the number of characters copied. A successful
19541952
// copy will complete with the post-condition: 'res' == 'len', while an

src/hotspot/cpu/s390/s390.ad

+1-1
Original file line numberDiff line numberDiff line change
@@ -10190,7 +10190,7 @@ instruct string_compress(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tm
1019010190
format %{ "String Compress $src->$dst($len) -> $result" %}
1019110191
ins_encode %{
1019210192
__ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
10193-
$tmp$$Register, false, false);
10193+
$tmp$$Register, true, false);
1019410194
%}
1019510195
ins_pipe(pipe_class_dummy);
1019610196
%}

src/hotspot/cpu/x86/macroAssembler_x86.cpp

+75-64
Original file line numberDiff line numberDiff line change
@@ -8628,23 +8628,27 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe
86288628
#undef BLOCK_COMMENT
86298629

86308630
// Compress char[] array to byte[].
8631-
// ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
8631+
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
8632+
// Return the array length if every element in array can be encoded,
8633+
// otherwise, the index of first non-latin1 (> 0xff) character.
86328634
// @IntrinsicCandidate
8633-
// private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
8635+
// public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
86348636
// for (int i = 0; i < len; i++) {
8635-
// int c = src[srcOff++];
8636-
// if (c >>> 8 != 0) {
8637-
// return 0;
8637+
// char c = src[srcOff];
8638+
// if (c > 0xff) {
8639+
// return i; // return index of non-latin1 char
86388640
// }
8639-
// dst[dstOff++] = (byte)c;
8641+
// dst[dstOff] = (byte)c;
8642+
// srcOff++;
8643+
// dstOff++;
86408644
// }
86418645
// return len;
86428646
// }
86438647
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
86448648
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
86458649
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
86468650
Register tmp5, Register result, KRegister mask1, KRegister mask2) {
8647-
Label copy_chars_loop, return_length, return_zero, done;
8651+
Label copy_chars_loop, done, reset_sp, copy_tail;
86488652

86498653
// rsi: src
86508654
// rdi: dst
@@ -8659,28 +8663,28 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
86598663
assert(len != result, "");
86608664

86618665
// save length for return
8662-
push(len);
8666+
movl(result, len);
86638667

86648668
if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
86658669
VM_Version::supports_avx512vlbw() &&
86668670
VM_Version::supports_bmi2()) {
86678671

8668-
Label copy_32_loop, copy_loop_tail, below_threshold;
8672+
Label copy_32_loop, copy_loop_tail, below_threshold, reset_for_copy_tail;
86698673

86708674
// alignment
86718675
Label post_alignment;
86728676

8673-
// if length of the string is less than 16, handle it in an old fashioned way
8677+
// if length of the string is less than 32, handle it the old fashioned way
86748678
testl(len, -32);
86758679
jcc(Assembler::zero, below_threshold);
86768680

86778681
// First check whether a character is compressible ( <= 0xFF).
86788682
// Create mask to test for Unicode chars inside zmm vector
8679-
movl(result, 0x00FF);
8680-
evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
8683+
movl(tmp5, 0x00FF);
8684+
evpbroadcastw(tmp2Reg, tmp5, Assembler::AVX_512bit);
86818685

86828686
testl(len, -64);
8683-
jcc(Assembler::zero, post_alignment);
8687+
jccb(Assembler::zero, post_alignment);
86848688

86858689
movl(tmp5, dst);
86868690
andl(tmp5, (32 - 1));
@@ -8689,18 +8693,19 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
86898693

86908694
// bail out when there is nothing to be done
86918695
testl(tmp5, 0xFFFFFFFF);
8692-
jcc(Assembler::zero, post_alignment);
8696+
jccb(Assembler::zero, post_alignment);
86938697

86948698
// ~(~0 << len), where len is the # of remaining elements to process
8695-
movl(result, 0xFFFFFFFF);
8696-
shlxl(result, result, tmp5);
8697-
notl(result);
8698-
kmovdl(mask2, result);
8699+
movl(len, 0xFFFFFFFF);
8700+
shlxl(len, len, tmp5);
8701+
notl(len);
8702+
kmovdl(mask2, len);
8703+
movl(len, result);
86998704

87008705
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
87018706
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
87028707
ktestd(mask1, mask2);
8703-
jcc(Assembler::carryClear, return_zero);
8708+
jcc(Assembler::carryClear, copy_tail);
87048709

87058710
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
87068711

@@ -8715,7 +8720,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
87158720
movl(tmp5, len);
87168721
andl(tmp5, (32 - 1)); // tail count (in chars)
87178722
andl(len, ~(32 - 1)); // vector count (in chars)
8718-
jcc(Assembler::zero, copy_loop_tail);
8723+
jccb(Assembler::zero, copy_loop_tail);
87198724

87208725
lea(src, Address(src, len, Address::times_2));
87218726
lea(dst, Address(dst, len, Address::times_1));
@@ -8725,55 +8730,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
87258730
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
87268731
evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
87278732
kortestdl(mask1, mask1);
8728-
jcc(Assembler::carryClear, return_zero);
8733+
jccb(Assembler::carryClear, reset_for_copy_tail);
87298734

87308735
// All elements in current processed chunk are valid candidates for
87318736
// compression. Write a truncated byte elements to the memory.
87328737
evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
87338738
addptr(len, 32);
8734-
jcc(Assembler::notZero, copy_32_loop);
8739+
jccb(Assembler::notZero, copy_32_loop);
87358740

87368741
bind(copy_loop_tail);
87378742
// bail out when there is nothing to be done
87388743
testl(tmp5, 0xFFFFFFFF);
8739-
jcc(Assembler::zero, return_length);
8744+
jcc(Assembler::zero, done);
87408745

87418746
movl(len, tmp5);
87428747

87438748
// ~(~0 << len), where len is the # of remaining elements to process
8744-
movl(result, 0xFFFFFFFF);
8745-
shlxl(result, result, len);
8746-
notl(result);
8749+
movl(tmp5, 0xFFFFFFFF);
8750+
shlxl(tmp5, tmp5, len);
8751+
notl(tmp5);
87478752

8748-
kmovdl(mask2, result);
8753+
kmovdl(mask2, tmp5);
87498754

87508755
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
87518756
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
87528757
ktestd(mask1, mask2);
8753-
jcc(Assembler::carryClear, return_zero);
8758+
jcc(Assembler::carryClear, copy_tail);
87548759

87558760
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
8756-
jmp(return_length);
8761+
jmp(done);
8762+
8763+
bind(reset_for_copy_tail);
8764+
lea(src, Address(src, tmp5, Address::times_2));
8765+
lea(dst, Address(dst, tmp5, Address::times_1));
8766+
subptr(len, tmp5);
8767+
jmp(copy_chars_loop);
87578768

87588769
bind(below_threshold);
87598770
}
87608771

87618772
if (UseSSE42Intrinsics) {
8762-
Label copy_32_loop, copy_16, copy_tail;
8773+
Label copy_32_loop, copy_16, copy_tail_sse, reset_for_copy_tail;
87638774

8764-
movl(result, len);
8775+
// vectored compression
8776+
testl(len, 0xfffffff8);
8777+
jcc(Assembler::zero, copy_tail);
87658778

87668779
movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
8780+
movdl(tmp1Reg, tmp5);
8781+
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
87678782

8768-
// vectored compression
8769-
andl(len, 0xfffffff0); // vector count (in chars)
8770-
andl(result, 0x0000000f); // tail count (in chars)
8771-
testl(len, len);
8772-
jcc(Assembler::zero, copy_16);
8783+
andl(len, 0xfffffff0);
8784+
jccb(Assembler::zero, copy_16);
87738785

87748786
// compress 16 chars per iter
8775-
movdl(tmp1Reg, tmp5);
8776-
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
87778787
pxor(tmp4Reg, tmp4Reg);
87788788

87798789
lea(src, Address(src, len, Address::times_2));
@@ -8786,59 +8796,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
87868796
movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
87878797
por(tmp4Reg, tmp3Reg);
87888798
ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
8789-
jcc(Assembler::notZero, return_zero);
8799+
jccb(Assembler::notZero, reset_for_copy_tail);
87908800
packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
87918801
movdqu(Address(dst, len, Address::times_1), tmp2Reg);
87928802
addptr(len, 16);
8793-
jcc(Assembler::notZero, copy_32_loop);
8803+
jccb(Assembler::notZero, copy_32_loop);
87948804

87958805
// compress next vector of 8 chars (if any)
87968806
bind(copy_16);
8797-
movl(len, result);
8798-
andl(len, 0xfffffff8); // vector count (in chars)
8799-
andl(result, 0x00000007); // tail count (in chars)
8800-
testl(len, len);
8801-
jccb(Assembler::zero, copy_tail);
8807+
// len = 0
8808+
testl(result, 0x00000008); // check if there's a block of 8 chars to compress
8809+
jccb(Assembler::zero, copy_tail_sse);
88028810

8803-
movdl(tmp1Reg, tmp5);
8804-
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
88058811
pxor(tmp3Reg, tmp3Reg);
88068812

88078813
movdqu(tmp2Reg, Address(src, 0));
88088814
ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
8809-
jccb(Assembler::notZero, return_zero);
8815+
jccb(Assembler::notZero, reset_for_copy_tail);
88108816
packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
88118817
movq(Address(dst, 0), tmp2Reg);
88128818
addptr(src, 16);
88138819
addptr(dst, 8);
8820+
jmpb(copy_tail_sse);
88148821

8815-
bind(copy_tail);
8822+
bind(reset_for_copy_tail);
8823+
movl(tmp5, result);
8824+
andl(tmp5, 0x0000000f);
8825+
lea(src, Address(src, tmp5, Address::times_2));
8826+
lea(dst, Address(dst, tmp5, Address::times_1));
8827+
subptr(len, tmp5);
8828+
jmpb(copy_chars_loop);
8829+
8830+
bind(copy_tail_sse);
88168831
movl(len, result);
8832+
andl(len, 0x00000007); // tail count (in chars)
88178833
}
88188834
// compress 1 char per iter
8835+
bind(copy_tail);
88198836
testl(len, len);
8820-
jccb(Assembler::zero, return_length);
8837+
jccb(Assembler::zero, done);
88218838
lea(src, Address(src, len, Address::times_2));
88228839
lea(dst, Address(dst, len, Address::times_1));
88238840
negptr(len);
88248841

88258842
bind(copy_chars_loop);
8826-
load_unsigned_short(result, Address(src, len, Address::times_2));
8827-
testl(result, 0xff00); // check if Unicode char
8828-
jccb(Assembler::notZero, return_zero);
8829-
movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte
8843+
load_unsigned_short(tmp5, Address(src, len, Address::times_2));
8844+
testl(tmp5, 0xff00); // check if Unicode char
8845+
jccb(Assembler::notZero, reset_sp);
8846+
movb(Address(dst, len, Address::times_1), tmp5); // ASCII char; compress to 1 byte
88308847
increment(len);
8831-
jcc(Assembler::notZero, copy_chars_loop);
8848+
jccb(Assembler::notZero, copy_chars_loop);
88328849

8833-
// if compression succeeded, return length
8834-
bind(return_length);
8835-
pop(result);
8836-
jmpb(done);
8837-
8838-
// if compression failed, return 0
8839-
bind(return_zero);
8840-
xorl(result, result);
8841-
addptr(rsp, wordSize);
8850+
// add len then return (len will be zero if compress succeeded, otherwise negative)
8851+
bind(reset_sp);
8852+
addl(result, len);
88428853

88438854
bind(done);
88448855
}

0 commit comments

Comments
 (0)