Skip to content

Commit

Permalink
8311906: Improve robustness of String constructors with mutable array…
Browse files Browse the repository at this point in the history
… inputs

Co-authored-by: Damon Fenacci <dfenacci@openjdk.org>
Co-authored-by: Claes Redestad <redestad@openjdk.org>
Co-authored-by: Amit Kumar <amitkumar@openjdk.org>
Co-authored-by: Martin Doerr <mdoerr@openjdk.org>
Reviewed-by: rgiulietti, thartmann, redestad, dfenacci
  • Loading branch information
5 people committed Dec 4, 2023
1 parent 316b783 commit 155abc5
Show file tree
Hide file tree
Showing 15 changed files with 1,305 additions and 253 deletions.
8 changes: 4 additions & 4 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5651,7 +5651,7 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
// return the number of characters copied.
// - java/lang/StringUTF16.compress
// return zero (0) if copy fails, otherwise 'len'.
// return index of non-latin1 character if copy fails, otherwise 'len'.
//
// This version always returns the number of characters copied, and does not
// clobber the 'len' register. A successful copy will complete with the post-
Expand Down Expand Up @@ -5868,15 +5868,15 @@ address MacroAssembler::byte_array_inflate(Register src, Register dst, Register
}

// Compress char[] array to byte[].
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
// Return the array length if every element in array can be encoded,
// otherwise, the index of first non-latin1 (> 0xff) character.
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
Register res,
FloatRegister tmp0, FloatRegister tmp1,
FloatRegister tmp2, FloatRegister tmp3,
FloatRegister tmp4, FloatRegister tmp5) {
encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
// Adjust result: res == len ? len : 0
cmp(len, res);
csel(res, res, zr, EQ);
}

// java.math.round(double a)
Expand Down
12 changes: 2 additions & 10 deletions src/hotspot/cpu/ppc/ppc.ad
Original file line number Diff line number Diff line change
Expand Up @@ -12727,16 +12727,8 @@ instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst re
ins_cost(300);
format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
ins_encode %{
Label Lskip, Ldone;
__ li($result$$Register, 0);
__ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
$tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
__ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
__ beq(CCR0, Lskip);
__ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
__ bind(Lskip);
__ mr($result$$Register, $len$$Register);
__ bind(Ldone);
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
$tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, false);
%}
ins_pipe(pipe_class_default);
%}
Expand Down
10 changes: 4 additions & 6 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1933,22 +1933,20 @@ void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Registe
}

// Compress char[] array to byte[].
// result: the array length if every element in array can be encoded; 0, otherwise.
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
// result: the array length if every element in array can be encoded,
// otherwise, the index of first non-latin1 (> 0xff) character.
void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len,
Register result, Register tmp) {
Label done;
encode_iso_array_v(src, dst, len, result, tmp, false);
beqz(len, done);
mv(result, zr);
bind(done);
}

// Intrinsic for
//
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
// return the number of characters copied.
// - java/lang/StringUTF16.compress
// return zero (0) if copy fails, otherwise 'len'.
// return index of non-latin1 character if copy fails, otherwise 'len'.
//
// This version always returns the number of characters copied. A successful
// copy will complete with the post-condition: 'res' == 'len', while an
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/s390/s390.ad
Original file line number Diff line number Diff line change
Expand Up @@ -10190,7 +10190,7 @@ instruct string_compress(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tm
format %{ "String Compress $src->$dst($len) -> $result" %}
ins_encode %{
__ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
$tmp$$Register, false, false);
$tmp$$Register, true, false);
%}
ins_pipe(pipe_class_dummy);
%}
Expand Down
139 changes: 75 additions & 64 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8628,23 +8628,27 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe
#undef BLOCK_COMMENT

// Compress char[] array to byte[].
// ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
// Return the array length if every element in array can be encoded,
// otherwise, the index of first non-latin1 (> 0xff) character.
// @IntrinsicCandidate
// private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
// public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
// for (int i = 0; i < len; i++) {
// int c = src[srcOff++];
// if (c >>> 8 != 0) {
// return 0;
// char c = src[srcOff];
// if (c > 0xff) {
// return i; // return index of non-latin1 char
// }
// dst[dstOff++] = (byte)c;
// dst[dstOff] = (byte)c;
// srcOff++;
// dstOff++;
// }
// return len;
// }
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
Register tmp5, Register result, KRegister mask1, KRegister mask2) {
Label copy_chars_loop, return_length, return_zero, done;
Label copy_chars_loop, done, reset_sp, copy_tail;

// rsi: src
// rdi: dst
Expand All @@ -8659,28 +8663,28 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
assert(len != result, "");

// save length for return
push(len);
movl(result, len);

if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
VM_Version::supports_avx512vlbw() &&
VM_Version::supports_bmi2()) {

Label copy_32_loop, copy_loop_tail, below_threshold;
Label copy_32_loop, copy_loop_tail, below_threshold, reset_for_copy_tail;

// alignment
Label post_alignment;

// if length of the string is less than 16, handle it in an old fashioned way
// if length of the string is less than 32, handle it the old fashioned way
testl(len, -32);
jcc(Assembler::zero, below_threshold);

// First check whether a character is compressible ( <= 0xFF).
// Create mask to test for Unicode chars inside zmm vector
movl(result, 0x00FF);
evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
movl(tmp5, 0x00FF);
evpbroadcastw(tmp2Reg, tmp5, Assembler::AVX_512bit);

testl(len, -64);
jcc(Assembler::zero, post_alignment);
jccb(Assembler::zero, post_alignment);

movl(tmp5, dst);
andl(tmp5, (32 - 1));
Expand All @@ -8689,18 +8693,19 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le

// bail out when there is nothing to be done
testl(tmp5, 0xFFFFFFFF);
jcc(Assembler::zero, post_alignment);
jccb(Assembler::zero, post_alignment);

// ~(~0 << len), where len is the # of remaining elements to process
movl(result, 0xFFFFFFFF);
shlxl(result, result, tmp5);
notl(result);
kmovdl(mask2, result);
movl(len, 0xFFFFFFFF);
shlxl(len, len, tmp5);
notl(len);
kmovdl(mask2, len);
movl(len, result);

evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
ktestd(mask1, mask2);
jcc(Assembler::carryClear, return_zero);
jcc(Assembler::carryClear, copy_tail);

evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);

Expand All @@ -8715,7 +8720,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
movl(tmp5, len);
andl(tmp5, (32 - 1)); // tail count (in chars)
andl(len, ~(32 - 1)); // vector count (in chars)
jcc(Assembler::zero, copy_loop_tail);
jccb(Assembler::zero, copy_loop_tail);

lea(src, Address(src, len, Address::times_2));
lea(dst, Address(dst, len, Address::times_1));
Expand All @@ -8725,55 +8730,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(mask1, mask1);
jcc(Assembler::carryClear, return_zero);
jccb(Assembler::carryClear, reset_for_copy_tail);

// All elements in current processed chunk are valid candidates for
// compression. Write a truncated byte elements to the memory.
evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop);
jccb(Assembler::notZero, copy_32_loop);

bind(copy_loop_tail);
// bail out when there is nothing to be done
testl(tmp5, 0xFFFFFFFF);
jcc(Assembler::zero, return_length);
jcc(Assembler::zero, done);

movl(len, tmp5);

// ~(~0 << len), where len is the # of remaining elements to process
movl(result, 0xFFFFFFFF);
shlxl(result, result, len);
notl(result);
movl(tmp5, 0xFFFFFFFF);
shlxl(tmp5, tmp5, len);
notl(tmp5);

kmovdl(mask2, result);
kmovdl(mask2, tmp5);

evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
ktestd(mask1, mask2);
jcc(Assembler::carryClear, return_zero);
jcc(Assembler::carryClear, copy_tail);

evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
jmp(return_length);
jmp(done);

bind(reset_for_copy_tail);
lea(src, Address(src, tmp5, Address::times_2));
lea(dst, Address(dst, tmp5, Address::times_1));
subptr(len, tmp5);
jmp(copy_chars_loop);

bind(below_threshold);
}

if (UseSSE42Intrinsics) {
Label copy_32_loop, copy_16, copy_tail;
Label copy_32_loop, copy_16, copy_tail_sse, reset_for_copy_tail;

movl(result, len);
// vectored compression
testl(len, 0xfffffff8);
jcc(Assembler::zero, copy_tail);

movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
movdl(tmp1Reg, tmp5);
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg

// vectored compression
andl(len, 0xfffffff0); // vector count (in chars)
andl(result, 0x0000000f); // tail count (in chars)
testl(len, len);
jcc(Assembler::zero, copy_16);
andl(len, 0xfffffff0);
jccb(Assembler::zero, copy_16);

// compress 16 chars per iter
movdl(tmp1Reg, tmp5);
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
pxor(tmp4Reg, tmp4Reg);

lea(src, Address(src, len, Address::times_2));
Expand All @@ -8786,59 +8796,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
por(tmp4Reg, tmp3Reg);
ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
jcc(Assembler::notZero, return_zero);
jccb(Assembler::notZero, reset_for_copy_tail);
packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
movdqu(Address(dst, len, Address::times_1), tmp2Reg);
addptr(len, 16);
jcc(Assembler::notZero, copy_32_loop);
jccb(Assembler::notZero, copy_32_loop);

// compress next vector of 8 chars (if any)
bind(copy_16);
movl(len, result);
andl(len, 0xfffffff8); // vector count (in chars)
andl(result, 0x00000007); // tail count (in chars)
testl(len, len);
jccb(Assembler::zero, copy_tail);
// len = 0
testl(result, 0x00000008); // check if there's a block of 8 chars to compress
jccb(Assembler::zero, copy_tail_sse);

movdl(tmp1Reg, tmp5);
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
pxor(tmp3Reg, tmp3Reg);

movdqu(tmp2Reg, Address(src, 0));
ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
jccb(Assembler::notZero, return_zero);
jccb(Assembler::notZero, reset_for_copy_tail);
packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
movq(Address(dst, 0), tmp2Reg);
addptr(src, 16);
addptr(dst, 8);
jmpb(copy_tail_sse);

bind(copy_tail);
bind(reset_for_copy_tail);
movl(tmp5, result);
andl(tmp5, 0x0000000f);
lea(src, Address(src, tmp5, Address::times_2));
lea(dst, Address(dst, tmp5, Address::times_1));
subptr(len, tmp5);
jmpb(copy_chars_loop);

bind(copy_tail_sse);
movl(len, result);
andl(len, 0x00000007); // tail count (in chars)
}
// compress 1 char per iter
bind(copy_tail);
testl(len, len);
jccb(Assembler::zero, return_length);
jccb(Assembler::zero, done);
lea(src, Address(src, len, Address::times_2));
lea(dst, Address(dst, len, Address::times_1));
negptr(len);

bind(copy_chars_loop);
load_unsigned_short(result, Address(src, len, Address::times_2));
testl(result, 0xff00); // check if Unicode char
jccb(Assembler::notZero, return_zero);
movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte
load_unsigned_short(tmp5, Address(src, len, Address::times_2));
testl(tmp5, 0xff00); // check if Unicode char
jccb(Assembler::notZero, reset_sp);
movb(Address(dst, len, Address::times_1), tmp5); // ASCII char; compress to 1 byte
increment(len);
jcc(Assembler::notZero, copy_chars_loop);
jccb(Assembler::notZero, copy_chars_loop);

// if compression succeeded, return length
bind(return_length);
pop(result);
jmpb(done);

// if compression failed, return 0
bind(return_zero);
xorl(result, result);
addptr(rsp, wordSize);
// add len then return (len will be zero if compress succeeded, otherwise negative)
bind(reset_sp);
addl(result, len);

bind(done);
}
Expand Down
Loading

1 comment on commit 155abc5

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.