@@ -8628,23 +8628,27 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe
86288628#undef BLOCK_COMMENT
86298629
86308630// Compress char[] array to byte[].
8631- // ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
8631+ // Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
8632+ // Return the array length if every element in array can be encoded,
8633+ // otherwise, the index of first non-latin1 (> 0xff) character.
86328634// @IntrinsicCandidate
8633- // private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
8635+ // public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
86348636// for (int i = 0; i < len; i++) {
8635- // int c = src[srcOff++ ];
8636- // if (c >>> 8 != 0 ) {
8637- // return 0;
8637+ // char c = src[srcOff];
8638+ // if (c > 0xff ) {
8639+ // return i; // return index of non-latin1 char
86388640// }
8639- // dst[dstOff++] = (byte)c;
8641+ // dst[dstOff] = (byte)c;
8642+ // srcOff++;
8643+ // dstOff++;
86408644// }
86418645// return len;
86428646// }
86438647void MacroAssembler::char_array_compress (Register src, Register dst, Register len,
86448648 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
86458649 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
86468650 Register tmp5, Register result, KRegister mask1, KRegister mask2) {
8647- Label copy_chars_loop, return_length, return_zero, done ;
8651+ Label copy_chars_loop, done, reset_sp, copy_tail ;
86488652
86498653 // rsi: src
86508654 // rdi: dst
@@ -8659,28 +8663,28 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
86598663 assert (len != result, " " );
86608664
86618665 // save length for return
8662- push ( len);
8666+ movl (result, len);
86638667
86648668 if ((AVX3Threshold == 0 ) && (UseAVX > 2 ) && // AVX512
86658669 VM_Version::supports_avx512vlbw () &&
86668670 VM_Version::supports_bmi2 ()) {
86678671
8668- Label copy_32_loop, copy_loop_tail, below_threshold;
8672+ Label copy_32_loop, copy_loop_tail, below_threshold, reset_for_copy_tail ;
86698673
86708674 // alignment
86718675 Label post_alignment;
86728676
8673- // if length of the string is less than 16 , handle it in an old fashioned way
8677+ // if length of the string is less than 32 , handle it the old fashioned way
86748678 testl (len, -32 );
86758679 jcc (Assembler::zero, below_threshold);
86768680
86778681 // First check whether a character is compressible ( <= 0xFF).
86788682 // Create mask to test for Unicode chars inside zmm vector
8679- movl (result , 0x00FF );
8680- evpbroadcastw (tmp2Reg, result , Assembler::AVX_512bit);
8683+ movl (tmp5 , 0x00FF );
8684+ evpbroadcastw (tmp2Reg, tmp5 , Assembler::AVX_512bit);
86818685
86828686 testl (len, -64 );
8683- jcc (Assembler::zero, post_alignment);
8687+ jccb (Assembler::zero, post_alignment);
86848688
86858689 movl (tmp5, dst);
86868690 andl (tmp5, (32 - 1 ));
@@ -8689,18 +8693,19 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
86898693
86908694 // bail out when there is nothing to be done
86918695 testl (tmp5, 0xFFFFFFFF );
8692- jcc (Assembler::zero, post_alignment);
8696+ jccb (Assembler::zero, post_alignment);
86938697
86948698 // ~(~0 << len), where len is the # of remaining elements to process
8695- movl (result, 0xFFFFFFFF );
8696- shlxl (result, result, tmp5);
8697- notl (result);
8698- kmovdl (mask2, result);
8699+ movl (len, 0xFFFFFFFF );
8700+ shlxl (len, len, tmp5);
8701+ notl (len);
8702+ kmovdl (mask2, len);
8703+ movl (len, result);
86998704
87008705 evmovdquw (tmp1Reg, mask2, Address (src, 0 ), /* merge*/ false , Assembler::AVX_512bit);
87018706 evpcmpw (mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /* signed*/ false , Assembler::AVX_512bit);
87028707 ktestd (mask1, mask2);
8703- jcc (Assembler::carryClear, return_zero );
8708+ jcc (Assembler::carryClear, copy_tail );
87048709
87058710 evpmovwb (Address (dst, 0 ), mask2, tmp1Reg, Assembler::AVX_512bit);
87068711
@@ -8715,7 +8720,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
87158720 movl (tmp5, len);
87168721 andl (tmp5, (32 - 1 )); // tail count (in chars)
87178722 andl (len, ~(32 - 1 )); // vector count (in chars)
8718- jcc (Assembler::zero, copy_loop_tail);
8723+ jccb (Assembler::zero, copy_loop_tail);
87198724
87208725 lea (src, Address (src, len, Address::times_2));
87218726 lea (dst, Address (dst, len, Address::times_1));
@@ -8725,55 +8730,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
87258730 evmovdquw (tmp1Reg, Address (src, len, Address::times_2), Assembler::AVX_512bit);
87268731 evpcmpuw (mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
87278732 kortestdl (mask1, mask1);
8728- jcc (Assembler::carryClear, return_zero );
8733+ jccb (Assembler::carryClear, reset_for_copy_tail );
87298734
87308735 // All elements in current processed chunk are valid candidates for
87318736 // compression. Write a truncated byte elements to the memory.
87328737 evpmovwb (Address (dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
87338738 addptr (len, 32 );
8734- jcc (Assembler::notZero, copy_32_loop);
8739+ jccb (Assembler::notZero, copy_32_loop);
87358740
87368741 bind (copy_loop_tail);
87378742 // bail out when there is nothing to be done
87388743 testl (tmp5, 0xFFFFFFFF );
8739- jcc (Assembler::zero, return_length );
8744+ jcc (Assembler::zero, done );
87408745
87418746 movl (len, tmp5);
87428747
87438748 // ~(~0 << len), where len is the # of remaining elements to process
8744- movl (result , 0xFFFFFFFF );
8745- shlxl (result, result , len);
8746- notl (result );
8749+ movl (tmp5 , 0xFFFFFFFF );
8750+ shlxl (tmp5, tmp5 , len);
8751+ notl (tmp5 );
87478752
8748- kmovdl (mask2, result );
8753+ kmovdl (mask2, tmp5 );
87498754
87508755 evmovdquw (tmp1Reg, mask2, Address (src, 0 ), /* merge*/ false , Assembler::AVX_512bit);
87518756 evpcmpw (mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /* signed*/ false , Assembler::AVX_512bit);
87528757 ktestd (mask1, mask2);
8753- jcc (Assembler::carryClear, return_zero );
8758+ jcc (Assembler::carryClear, copy_tail );
87548759
87558760 evpmovwb (Address (dst, 0 ), mask2, tmp1Reg, Assembler::AVX_512bit);
8756- jmp (return_length);
8761+ jmp (done);
8762+
8763+ bind (reset_for_copy_tail);
8764+ lea (src, Address (src, tmp5, Address::times_2));
8765+ lea (dst, Address (dst, tmp5, Address::times_1));
8766+ subptr (len, tmp5);
8767+ jmp (copy_chars_loop);
87578768
87588769 bind (below_threshold);
87598770 }
87608771
87618772 if (UseSSE42Intrinsics) {
8762- Label copy_32_loop, copy_16, copy_tail ;
8773+ Label copy_32_loop, copy_16, copy_tail_sse, reset_for_copy_tail ;
87638774
8764- movl (result, len);
8775+ // vectored compression
8776+ testl (len, 0xfffffff8 );
8777+ jcc (Assembler::zero, copy_tail);
87658778
87668779 movl (tmp5, 0xff00ff00 ); // create mask to test for Unicode chars in vectors
8780+ movdl (tmp1Reg, tmp5);
8781+ pshufd (tmp1Reg, tmp1Reg, 0 ); // store Unicode mask in tmp1Reg
87678782
8768- // vectored compression
8769- andl (len, 0xfffffff0 ); // vector count (in chars)
8770- andl (result, 0x0000000f ); // tail count (in chars)
8771- testl (len, len);
8772- jcc (Assembler::zero, copy_16);
8783+ andl (len, 0xfffffff0 );
8784+ jccb (Assembler::zero, copy_16);
87738785
87748786 // compress 16 chars per iter
8775- movdl (tmp1Reg, tmp5);
8776- pshufd (tmp1Reg, tmp1Reg, 0 ); // store Unicode mask in tmp1Reg
87778787 pxor (tmp4Reg, tmp4Reg);
87788788
87798789 lea (src, Address (src, len, Address::times_2));
@@ -8786,59 +8796,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
87868796 movdqu (tmp3Reg, Address (src, len, Address::times_2, 16 )); // load next 8 characters
87878797 por (tmp4Reg, tmp3Reg);
87888798 ptest (tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
8789- jcc (Assembler::notZero, return_zero );
8799+ jccb (Assembler::notZero, reset_for_copy_tail );
87908800 packuswb (tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
87918801 movdqu (Address (dst, len, Address::times_1), tmp2Reg);
87928802 addptr (len, 16 );
8793- jcc (Assembler::notZero, copy_32_loop);
8803+ jccb (Assembler::notZero, copy_32_loop);
87948804
87958805 // compress next vector of 8 chars (if any)
87968806 bind (copy_16);
8797- movl (len, result);
8798- andl (len, 0xfffffff8 ); // vector count (in chars)
8799- andl (result, 0x00000007 ); // tail count (in chars)
8800- testl (len, len);
8801- jccb (Assembler::zero, copy_tail);
8807+ // len = 0
8808+ testl (result, 0x00000008 ); // check if there's a block of 8 chars to compress
8809+ jccb (Assembler::zero, copy_tail_sse);
88028810
8803- movdl (tmp1Reg, tmp5);
8804- pshufd (tmp1Reg, tmp1Reg, 0 ); // store Unicode mask in tmp1Reg
88058811 pxor (tmp3Reg, tmp3Reg);
88068812
88078813 movdqu (tmp2Reg, Address (src, 0 ));
88088814 ptest (tmp2Reg, tmp1Reg); // check for Unicode chars in vector
8809- jccb (Assembler::notZero, return_zero );
8815+ jccb (Assembler::notZero, reset_for_copy_tail );
88108816 packuswb (tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
88118817 movq (Address (dst, 0 ), tmp2Reg);
88128818 addptr (src, 16 );
88138819 addptr (dst, 8 );
8820+ jmpb (copy_tail_sse);
88148821
8815- bind (copy_tail);
8822+ bind (reset_for_copy_tail);
8823+ movl (tmp5, result);
8824+ andl (tmp5, 0x0000000f );
8825+ lea (src, Address (src, tmp5, Address::times_2));
8826+ lea (dst, Address (dst, tmp5, Address::times_1));
8827+ subptr (len, tmp5);
8828+ jmpb (copy_chars_loop);
8829+
8830+ bind (copy_tail_sse);
88168831 movl (len, result);
8832+ andl (len, 0x00000007 ); // tail count (in chars)
88178833 }
88188834 // compress 1 char per iter
8835+ bind (copy_tail);
88198836 testl (len, len);
8820- jccb (Assembler::zero, return_length );
8837+ jccb (Assembler::zero, done );
88218838 lea (src, Address (src, len, Address::times_2));
88228839 lea (dst, Address (dst, len, Address::times_1));
88238840 negptr (len);
88248841
88258842 bind (copy_chars_loop);
8826- load_unsigned_short (result , Address (src, len, Address::times_2));
8827- testl (result , 0xff00 ); // check if Unicode char
8828- jccb (Assembler::notZero, return_zero );
8829- movb (Address (dst, len, Address::times_1), result ); // ASCII char; compress to 1 byte
8843+ load_unsigned_short (tmp5 , Address (src, len, Address::times_2));
8844+ testl (tmp5 , 0xff00 ); // check if Unicode char
8845+ jccb (Assembler::notZero, reset_sp );
8846+ movb (Address (dst, len, Address::times_1), tmp5 ); // ASCII char; compress to 1 byte
88308847 increment (len);
8831- jcc (Assembler::notZero, copy_chars_loop);
8848+ jccb (Assembler::notZero, copy_chars_loop);
88328849
8833- // if compression succeeded, return length
8834- bind (return_length);
8835- pop (result);
8836- jmpb (done);
8837-
8838- // if compression failed, return 0
8839- bind (return_zero);
8840- xorl (result, result);
8841- addptr (rsp, wordSize);
8850+ // add len then return (len will be zero if compress succeeded, otherwise negative)
8851+ bind (reset_sp);
8852+ addl (result, len);
88428853
88438854 bind (done);
88448855}
0 commit comments