Skip to content

Commit f65ef5d

Browse files
committed
Fix header < 16 bytes in indexOf intrinsic, by @sviswa7
1 parent 17f8eb5 commit f65ef5d

File tree

1 file changed

+90
-53
lines changed

1 file changed

+90
-53
lines changed

src/hotspot/cpu/x86/c2_stubGenerator_x86_64_string.cpp

Lines changed: 90 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "precompiled.hpp"
2727
#include "macroAssembler_x86.hpp"
2828
#include "stubGenerator_x86_64.hpp"
29+
#include "oops/arrayOop.hpp"
2930
#include "opto/c2_MacroAssembler.hpp"
3031
#include "opto/intrinsicnode.hpp"
3132

@@ -160,6 +161,9 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis
160161
Register needle_len, XMMRegister XMM0, XMMRegister XMM1,
161162
Register mask, Register tmp, MacroAssembler *_masm);
162163

164+
static void copy_to_stack(Register haystack, Register haystack_len, bool isU, Register tmp,
165+
XMMRegister xtmp, MacroAssembler *_masm);
166+
163167
static void setup_jump_tables(StrIntrinsicNode::ArgEncoding ae, Label &L_error, Label &L_checkRange,
164168
Label &L_fixup, address *big_jump_table, address *small_jump_table,
165169
MacroAssembler *_masm);
@@ -395,41 +399,20 @@ static void generate_string_indexof_stubs(StubGenerator *stubgen, address *fnptr
395399

396400
// Do "big switch" if haystack size > 32
397401
__ cmpq(haystack_len, 0x20);
398-
__ ja_b(L_bigSwitchTop);
402+
__ ja(L_bigSwitchTop);
399403

400404
// Copy the small (< 32 byte) haystack to the stack. Allows for vector reads without page fault
401405
// Only done for small haystacks
402406
//
403407
// NOTE: This code assumes that the haystack points to a java array type AND there are
404-
// at least 16 bytes of header preceeding the haystack pointer.
408+
// at least 8 bytes of header preceeding the haystack pointer.
405409
//
406-
// This means that we're copying up to 15 bytes of the header onto the stack along
410+
// This means that we're copying up to 7 bytes of the header onto the stack along
407411
// with the haystack bytes. After the copy completes, we adjust the haystack pointer
408412
// to the valid haystack bytes on the stack.
409413
{
410-
Label L_moreThan16, L_adjustHaystack;
411-
412-
const Register index = rax;
413414
const Register haystack = rbx;
414-
415-
// Only a single vector load/store of either 16 or 32 bytes
416-
__ cmpq(haystack_len, 0x10);
417-
__ ja_b(L_moreThan16);
418-
419-
__ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x10);
420-
__ movdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x10));
421-
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1);
422-
__ jmpb(L_adjustHaystack);
423-
424-
__ bind(L_moreThan16);
425-
__ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x20);
426-
__ vmovdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x20));
427-
__ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1);
428-
429-
// Point the haystack at the correct location of the first byte of the "real" haystack on the stack
430-
__ bind(L_adjustHaystack);
431-
__ subq(index, haystack_len);
432-
__ leaq(haystack, Address(rsp, index, Address::times_1));
415+
copy_to_stack(haystack, haystack_len, false, rax, XMM_TMP1, _masm);
433416
}
434417

435418
// Dispatch to handlers for small needle and small haystack
@@ -1583,34 +1566,8 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis
15831566
assert((COPIED_HAYSTACK_STACK_SIZE == 64), "Must be 64!");
15841567

15851568
// Copy incoming haystack onto stack
1586-
{
1587-
Label L_adjustHaystack, L_moreThan16;
1588-
1589-
// Copy haystack to stack (haystack <= 32 bytes)
1590-
__ subptr(rsp, COPIED_HAYSTACK_STACK_SIZE);
1591-
__ cmpq(haystack_len, isU ? 0x8 : 0x10);
1592-
__ ja_b(L_moreThan16);
1593-
1594-
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x10);
1595-
__ movdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x10));
1596-
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0);
1597-
__ jmpb(L_adjustHaystack);
1598-
1599-
__ bind(L_moreThan16);
1600-
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x20);
1601-
__ vmovdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x20));
1602-
__ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0);
1603-
1604-
__ bind(L_adjustHaystack);
1605-
__ subptr(tmp, haystack_len);
1606-
1607-
if (isU) {
1608-
// For UTF-16, lengths are half
1609-
__ subptr(tmp, haystack_len);
1610-
}
1611-
// Point the haystack to the stack
1612-
__ leaq(haystack, Address(rsp, tmp, Address::times_1));
1613-
}
1569+
__ subptr(rsp, COPIED_HAYSTACK_STACK_SIZE);
1570+
copy_to_stack(haystack, haystack_len, isU, tmp, XMM0, _masm);
16141571

16151572
// Creates a mask of (n - k + 1) ones. This prevents recognizing any false-positives
16161573
// past the end of the valid haystack.
@@ -1672,6 +1629,86 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis
16721629
__ jmpb(L_out);
16731630
}
16741631

1632+
1633+
1634+
// Copy the small (< 32 byte) haystack to the stack. Allows for vector reads without page fault
1635+
// Only done for small haystacks
1636+
// NOTE: This code assumes that the haystack points to a java array type AND there are
1637+
// at least 8 bytes of header preceeding the haystack pointer.
1638+
// We're copying up to 7 bytes of the header onto the stack along with the haystack bytes.
1639+
// After the copy completes, we adjust the haystack pointer
1640+
// to the valid haystack bytes on the stack.
1641+
//
1642+
// Copy haystack array elements to stack at region
1643+
// (COPIED_HAYSTACK_STACK_OFFSET - COPIED_HAYSTACK_STACK_OFFSET+63) with the following conditions:
1644+
// It may copy up to 7 bytes that precede the array
1645+
// It doesn't read beyond the end of the array
1646+
// There are atleast 31 bytes of stack region beyond the end of array
1647+
// Inputs:
1648+
// haystack - Address of haystack
1649+
// haystack_len - Number of elements in haystack
1650+
// isU - Boolean indicating if each element is Latin1 or UTF16
1651+
// tmp, xtmp - Scratch registers
1652+
// Output:
1653+
// haystack - Address of copied string on stack
1654+
1655+
static void copy_to_stack(Register haystack, Register haystack_len, bool isU,
1656+
Register tmp, XMMRegister xtmp, MacroAssembler *_masm) {
1657+
Label L_moreThan8, L_moreThan16, L_moreThan24, L_adjustHaystack;
1658+
1659+
assert(arrayOopDesc::base_offset_in_bytes(isU ? T_CHAR : T_BYTE) >= 8,
1660+
"Needs at least 8 bytes preceding the array body");
1661+
1662+
// Copy haystack to stack (haystack <= 32 bytes)
1663+
int scale = isU ? 2 : 1; // bytes per char
1664+
Address::ScaleFactor addrScale = isU ? Address::times_2 : Address::times_1;
1665+
1666+
__ cmpq(haystack_len, 16/scale);
1667+
__ ja_b(L_moreThan16);
1668+
1669+
__ cmpq(haystack_len, 8/scale);
1670+
__ ja_b(L_moreThan8);
1671+
// haystack length <= 8 bytes, copy 8 bytes upto haystack end reading at most 7 bytes into the header
1672+
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 8);
1673+
__ movq(xtmp, Address(haystack, haystack_len, addrScale, -8));
1674+
__ movq(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);
1675+
__ jmpb(L_adjustHaystack);
1676+
1677+
__ bind(L_moreThan8);
1678+
// haystack length > 8 and <=16 bytes, copy 16 bytes upto haystack end reading at most 7 bytes into the header
1679+
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 16);
1680+
__ movdqu(xtmp, Address(haystack, haystack_len, addrScale, -16));
1681+
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);
1682+
__ jmpb(L_adjustHaystack);
1683+
1684+
__ bind(L_moreThan16);
1685+
__ cmpq(haystack_len, 24/scale);
1686+
__ ja_b(L_moreThan24);
1687+
// haystack length > 16 and <=24 bytes, copy 24 bytes upto haystack end reading at most 7 bytes into the header
1688+
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 24);
1689+
__ movdqu(xtmp, Address(haystack, haystack_len, addrScale, -24));
1690+
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);
1691+
__ movq(xtmp, Address(haystack, haystack_len, addrScale, -8));
1692+
__ movq(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET + 16), xtmp);
1693+
__ jmpb(L_adjustHaystack);
1694+
1695+
__ bind(L_moreThan24);
1696+
// haystack length > 24 and < 32 bytes, copy 32 bytes upto haystack end reading at most 7 bytes into the header
1697+
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 32);
1698+
__ vmovdqu(xtmp, Address(haystack, haystack_len, addrScale, -32));
1699+
__ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);
1700+
1701+
__ bind(L_adjustHaystack);
1702+
__ subptr(tmp, haystack_len);
1703+
1704+
if (isU) {
1705+
__ subptr(tmp, haystack_len);
1706+
}
1707+
1708+
// Point the haystack to the stack
1709+
__ leaq(haystack, Address(rsp, tmp, Address::times_1));
1710+
}
1711+
16751712
////////////////////////////////////////////////////////////////////////////////////////
16761713
////////////////////////////////////////////////////////////////////////////////////////
16771714
////////////////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)