|
26 | 26 | #include "precompiled.hpp" |
27 | 27 | #include "macroAssembler_x86.hpp" |
28 | 28 | #include "stubGenerator_x86_64.hpp" |
| 29 | +#include "oops/arrayOop.hpp" |
29 | 30 | #include "opto/c2_MacroAssembler.hpp" |
30 | 31 | #include "opto/intrinsicnode.hpp" |
31 | 32 |
|
@@ -160,6 +161,9 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis |
160 | 161 | Register needle_len, XMMRegister XMM0, XMMRegister XMM1, |
161 | 162 | Register mask, Register tmp, MacroAssembler *_masm); |
162 | 163 |
|
| 164 | +static void copy_to_stack(Register haystack, Register haystack_len, bool isU, Register tmp, |
| 165 | + XMMRegister xtmp, MacroAssembler *_masm); |
| 166 | + |
163 | 167 | static void setup_jump_tables(StrIntrinsicNode::ArgEncoding ae, Label &L_error, Label &L_checkRange, |
164 | 168 | Label &L_fixup, address *big_jump_table, address *small_jump_table, |
165 | 169 | MacroAssembler *_masm); |
@@ -395,41 +399,20 @@ static void generate_string_indexof_stubs(StubGenerator *stubgen, address *fnptr |
395 | 399 |
|
396 | 400 | // Do "big switch" if haystack size > 32 |
397 | 401 | __ cmpq(haystack_len, 0x20); |
398 | | - __ ja_b(L_bigSwitchTop); |
| 402 | + __ ja(L_bigSwitchTop); |
399 | 403 |
|
400 | 404 | // Copy the small (< 32 byte) haystack to the stack. Allows for vector reads without page fault |
401 | 405 | // Only done for small haystacks |
402 | 406 | // |
403 | 407 | // NOTE: This code assumes that the haystack points to a java array type AND there are |
404 | | - // at least 16 bytes of header preceeding the haystack pointer. |
| 408 | + // at least 8 bytes of header preceeding the haystack pointer. |
405 | 409 | // |
406 | | - // This means that we're copying up to 15 bytes of the header onto the stack along |
| 410 | + // This means that we're copying up to 7 bytes of the header onto the stack along |
407 | 411 | // with the haystack bytes. After the copy completes, we adjust the haystack pointer |
408 | 412 | // to the valid haystack bytes on the stack. |
409 | 413 | { |
410 | | - Label L_moreThan16, L_adjustHaystack; |
411 | | - |
412 | | - const Register index = rax; |
413 | 414 | const Register haystack = rbx; |
414 | | - |
415 | | - // Only a single vector load/store of either 16 or 32 bytes |
416 | | - __ cmpq(haystack_len, 0x10); |
417 | | - __ ja_b(L_moreThan16); |
418 | | - |
419 | | - __ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x10); |
420 | | - __ movdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x10)); |
421 | | - __ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1); |
422 | | - __ jmpb(L_adjustHaystack); |
423 | | - |
424 | | - __ bind(L_moreThan16); |
425 | | - __ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x20); |
426 | | - __ vmovdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x20)); |
427 | | - __ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1); |
428 | | - |
429 | | - // Point the haystack at the correct location of the first byte of the "real" haystack on the stack |
430 | | - __ bind(L_adjustHaystack); |
431 | | - __ subq(index, haystack_len); |
432 | | - __ leaq(haystack, Address(rsp, index, Address::times_1)); |
| 415 | + copy_to_stack(haystack, haystack_len, false, rax, XMM_TMP1, _masm); |
433 | 416 | } |
434 | 417 |
|
435 | 418 | // Dispatch to handlers for small needle and small haystack |
@@ -1583,34 +1566,8 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis |
1583 | 1566 | assert((COPIED_HAYSTACK_STACK_SIZE == 64), "Must be 64!"); |
1584 | 1567 |
|
1585 | 1568 | // Copy incoming haystack onto stack |
1586 | | - { |
1587 | | - Label L_adjustHaystack, L_moreThan16; |
1588 | | - |
1589 | | - // Copy haystack to stack (haystack <= 32 bytes) |
1590 | | - __ subptr(rsp, COPIED_HAYSTACK_STACK_SIZE); |
1591 | | - __ cmpq(haystack_len, isU ? 0x8 : 0x10); |
1592 | | - __ ja_b(L_moreThan16); |
1593 | | - |
1594 | | - __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x10); |
1595 | | - __ movdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x10)); |
1596 | | - __ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0); |
1597 | | - __ jmpb(L_adjustHaystack); |
1598 | | - |
1599 | | - __ bind(L_moreThan16); |
1600 | | - __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x20); |
1601 | | - __ vmovdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x20)); |
1602 | | - __ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0); |
1603 | | - |
1604 | | - __ bind(L_adjustHaystack); |
1605 | | - __ subptr(tmp, haystack_len); |
1606 | | - |
1607 | | - if (isU) { |
1608 | | - // For UTF-16, lengths are half |
1609 | | - __ subptr(tmp, haystack_len); |
1610 | | - } |
1611 | | - // Point the haystack to the stack |
1612 | | - __ leaq(haystack, Address(rsp, tmp, Address::times_1)); |
1613 | | - } |
| 1569 | + __ subptr(rsp, COPIED_HAYSTACK_STACK_SIZE); |
| 1570 | + copy_to_stack(haystack, haystack_len, isU, tmp, XMM0, _masm); |
1614 | 1571 |
|
1615 | 1572 | // Creates a mask of (n - k + 1) ones. This prevents recognizing any false-positives |
1616 | 1573 | // past the end of the valid haystack. |
@@ -1672,6 +1629,86 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis |
1672 | 1629 | __ jmpb(L_out); |
1673 | 1630 | } |
1674 | 1631 |
|
| 1632 | + |
| 1633 | + |
| 1634 | +// Copy the small (< 32 byte) haystack to the stack. Allows for vector reads without page fault |
| 1635 | +// Only done for small haystacks |
| 1636 | +// NOTE: This code assumes that the haystack points to a java array type AND there are |
| 1637 | +// at least 8 bytes of header preceeding the haystack pointer. |
| 1638 | +// We're copying up to 7 bytes of the header onto the stack along with the haystack bytes. |
| 1639 | +// After the copy completes, we adjust the haystack pointer |
| 1640 | +// to the valid haystack bytes on the stack. |
| 1641 | +// |
| 1642 | +// Copy haystack array elements to stack at region |
| 1643 | +// (COPIED_HAYSTACK_STACK_OFFSET - COPIED_HAYSTACK_STACK_OFFSET+63) with the following conditions: |
| 1644 | +// It may copy up to 7 bytes that precede the array |
| 1645 | +// It doesn't read beyond the end of the array |
| 1646 | +// There are atleast 31 bytes of stack region beyond the end of array |
| 1647 | +// Inputs: |
| 1648 | +// haystack - Address of haystack |
| 1649 | +// haystack_len - Number of elements in haystack |
| 1650 | +// isU - Boolean indicating if each element is Latin1 or UTF16 |
| 1651 | +// tmp, xtmp - Scratch registers |
| 1652 | +// Output: |
| 1653 | +// haystack - Address of copied string on stack |
| 1654 | + |
| 1655 | +static void copy_to_stack(Register haystack, Register haystack_len, bool isU, |
| 1656 | + Register tmp, XMMRegister xtmp, MacroAssembler *_masm) { |
| 1657 | + Label L_moreThan8, L_moreThan16, L_moreThan24, L_adjustHaystack; |
| 1658 | + |
| 1659 | + assert(arrayOopDesc::base_offset_in_bytes(isU ? T_CHAR : T_BYTE) >= 8, |
| 1660 | + "Needs at least 8 bytes preceding the array body"); |
| 1661 | + |
| 1662 | + // Copy haystack to stack (haystack <= 32 bytes) |
| 1663 | + int scale = isU ? 2 : 1; // bytes per char |
| 1664 | + Address::ScaleFactor addrScale = isU ? Address::times_2 : Address::times_1; |
| 1665 | + |
| 1666 | + __ cmpq(haystack_len, 16/scale); |
| 1667 | + __ ja_b(L_moreThan16); |
| 1668 | + |
| 1669 | + __ cmpq(haystack_len, 8/scale); |
| 1670 | + __ ja_b(L_moreThan8); |
| 1671 | + // haystack length <= 8 bytes, copy 8 bytes upto haystack end reading at most 7 bytes into the header |
| 1672 | + __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 8); |
| 1673 | + __ movq(xtmp, Address(haystack, haystack_len, addrScale, -8)); |
| 1674 | + __ movq(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp); |
| 1675 | + __ jmpb(L_adjustHaystack); |
| 1676 | + |
| 1677 | + __ bind(L_moreThan8); |
| 1678 | + // haystack length > 8 and <=16 bytes, copy 16 bytes upto haystack end reading at most 7 bytes into the header |
| 1679 | + __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 16); |
| 1680 | + __ movdqu(xtmp, Address(haystack, haystack_len, addrScale, -16)); |
| 1681 | + __ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp); |
| 1682 | + __ jmpb(L_adjustHaystack); |
| 1683 | + |
| 1684 | + __ bind(L_moreThan16); |
| 1685 | + __ cmpq(haystack_len, 24/scale); |
| 1686 | + __ ja_b(L_moreThan24); |
| 1687 | + // haystack length > 16 and <=24 bytes, copy 24 bytes upto haystack end reading at most 7 bytes into the header |
| 1688 | + __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 24); |
| 1689 | + __ movdqu(xtmp, Address(haystack, haystack_len, addrScale, -24)); |
| 1690 | + __ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp); |
| 1691 | + __ movq(xtmp, Address(haystack, haystack_len, addrScale, -8)); |
| 1692 | + __ movq(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET + 16), xtmp); |
| 1693 | + __ jmpb(L_adjustHaystack); |
| 1694 | + |
| 1695 | + __ bind(L_moreThan24); |
| 1696 | + // haystack length > 24 and < 32 bytes, copy 32 bytes upto haystack end reading at most 7 bytes into the header |
| 1697 | + __ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 32); |
| 1698 | + __ vmovdqu(xtmp, Address(haystack, haystack_len, addrScale, -32)); |
| 1699 | + __ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp); |
| 1700 | + |
| 1701 | + __ bind(L_adjustHaystack); |
| 1702 | + __ subptr(tmp, haystack_len); |
| 1703 | + |
| 1704 | + if (isU) { |
| 1705 | + __ subptr(tmp, haystack_len); |
| 1706 | + } |
| 1707 | + |
| 1708 | + // Point the haystack to the stack |
| 1709 | + __ leaq(haystack, Address(rsp, tmp, Address::times_1)); |
| 1710 | +} |
| 1711 | + |
1675 | 1712 | //////////////////////////////////////////////////////////////////////////////////////// |
1676 | 1713 | //////////////////////////////////////////////////////////////////////////////////////// |
1677 | 1714 | //////////////////////////////////////////////////////////////////////////////////////// |
|
0 commit comments