Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
3e2aab6
Add Unsafe.setMemory as intrinsic
asgibbons Mar 27, 2024
2334b03
Added actual intrinsic
asgibbons Mar 27, 2024
6eebcbd
Removed setMemory1; debugged intrinsic code
asgibbons Mar 28, 2024
7c73856
Test removing intrinsic
asgibbons Mar 28, 2024
74c47e2
Add benchmark
asgibbons Mar 28, 2024
6e283bc
Restore intrinsic
asgibbons Mar 28, 2024
44c24ec
Address review comment
asgibbons Mar 29, 2024
b17a1f4
Fixed bug - incorrect interface to *_fill_entry
asgibbons Mar 29, 2024
401a2a9
Clean up code for PR
asgibbons Mar 29, 2024
c5cb30c
Use non-sse fill (old left in)
asgibbons Apr 1, 2024
6ee69c8
Remove dead code
asgibbons Apr 1, 2024
3aa60a4
Addressing review comments.
asgibbons Apr 2, 2024
8bed156
Fix Windows
asgibbons Apr 3, 2024
b025318
Fixed generate_fill when count > 0x80000000
asgibbons Apr 5, 2024
fd6f04f
Oops
asgibbons Apr 6, 2024
f81aaa9
Add movq to locate_operand
asgibbons Apr 8, 2024
b0ac857
Address review comments (#15)
asgibbons Apr 11, 2024
95230e2
Set memory test (#16)
asgibbons Apr 11, 2024
41ffcc3
Merge master
asgibbons Apr 11, 2024
b99499a
Fix whitespace error.
asgibbons Apr 11, 2024
89db3eb
Addressing more review comments
asgibbons Apr 11, 2024
970c575
Addressing yet more review comments
asgibbons Apr 12, 2024
6e731c8
Even more review comments
asgibbons Apr 12, 2024
405e4e0
Change fill routines
asgibbons Apr 15, 2024
95b0a34
Rename UnsafeCopyMemory{,Mark} to UnsafeMemory{Access,Mark} (#19)
asgibbons Apr 15, 2024
44cc91b
Only add a memory mark for byte unaligned fill
asgibbons Apr 15, 2024
824fb60
Set memory test (#21)
asgibbons Apr 15, 2024
80b5a0c
Set memory test (#22)
asgibbons Apr 15, 2024
856464e
Set memory test (#23)
asgibbons Apr 15, 2024
116d7dd
Merge branch 'openjdk:master' into setMemory
asgibbons Apr 15, 2024
113aa90
Fix memory mark after sync to upstream
asgibbons Apr 15, 2024
7a1d67e
Add enter() and leave(); remove Windows-specific register stuff
asgibbons Apr 16, 2024
dccf6b6
Address review comments; update copyright years
asgibbons Apr 19, 2024
dd0094e
Review comments
asgibbons Apr 19, 2024
1961624
Long to short jmp; other cleanup
asgibbons Apr 19, 2024
c129016
Fix UnsafeCopyMemoryMark scope issue
asgibbons Apr 20, 2024
1122b50
Merge branch 'openjdk:master' into setMemory
asgibbons Apr 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8337,8 +8337,8 @@ class StubGenerator: public StubCodeGenerator {
}

// Initialize table for fill memory check.
if (UnsafeCopyMemory::_table == nullptr) {
UnsafeCopyMemory::create_table(8);
if (UnsafeSetMemory::_table == nullptr) {
UnsafeSetMemory::create_table(8);
}

if (UseCRC32Intrinsics) {
Expand Down
314 changes: 151 additions & 163 deletions src/hotspot/cpu/x86/stubGenerator_x86_64_arraycopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,8 @@ void StubGenerator::generate_arraycopy_stubs() {
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");

// #ifdef _LP64
StubRoutines::_unsafe_setmemory =
generate_unsafe_setmemory("unsafe_setmemory", StubRoutines::_jbyte_fill);
// #endif

// We don't generate specialized code for HeapWord-aligned source
// arrays, so just use the code we've already generated
Expand Down Expand Up @@ -2507,194 +2505,189 @@ address StubGenerator::generate_unsafe_setmemory(const char *name,
Label L_exit, L_fillQuadwords, L_fillDwords, L_fillBytes;

setup_arg_regs(3);
#undef dest
#define dest rdi
#undef size
#define size rsi
#undef wide_value
#define wide_value rax
#undef rScratch1
#define rScratch1 rcx
#undef byteVal
#define byteVal rdx
#undef rScratch3
#define rScratch3 r8
#undef rScratch4
#define rScratch4 r11

const Register dest = rdi;
const Register size = rsi;
const Register wide_value = rax;
const Register rScratch1 = rcx;
const Register rScratch3 = r8;
const Register rScratch4 = r11;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe put an assert_different_registers here for the above registers, just to be sure. (I see you are avoiding the existing rscratch1 already, because of a conflict with c_rarg2)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

// fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char)

__ testq(size, size);
__ jcc(Assembler::zero, L_exit);

// Propagate byte to full register
__ movq(rScratch1, dest);
__ orq(rScratch1, size);
__ movzbl(rScratch3, byteVal);
__ mov64(wide_value, 0x0101010101010101);
__ imulq(wide_value, rScratch3);

#undef byteVal
#define rScratch2 rdx
__ testb(rScratch1, 7);
__ jcc(Assembler::equal, L_fillQuadwords);

__ testb(rScratch1, 3);
__ jcc(Assembler::equal, L_fillDwords);

__ testb(rScratch1, 1);
__ jcc(Assembler::notEqual, L_fillBytes);

// Fill words
{
Label L_wordsTail, L_wordsLoop, L_wordsTailLoop;
UnsafeSetMemoryMark usmm(this, true, true);
////// Set words
__ leaq(rScratch2, Address(size, 1));
__ movq(rScratch1, rScratch2);
__ shrq(rScratch1, 4);
__ cmpq(rScratch2, 16);
__ jccb(Assembler::below, L_wordsTail);
__ leaq(rScratch3, Address(dest, 14));
__ movq(rScratch4, rScratch1);

__ BIND(L_wordsLoop);

// Unroll 8 word stores
for (int i = 7; i >= 0; i--) {
__ movw(Address(rScratch3, -(2 * i)), wide_value);
}
const Register byteVal = rdx;

// Propagate byte to full Register
__ movq(rScratch1, dest);
__ orq(rScratch1, size);
__ movzbl(rScratch3, byteVal);
__ mov64(wide_value, 0x0101010101010101);
__ imulq(wide_value, rScratch3);
}

__ addq(rScratch3, 16);
__ decrementq(rScratch4);
__ jccb(Assembler::notEqual, L_wordsLoop);
{
const Register rScratch2 = rdx;

__ BIND(L_wordsTail);
__ testb(rScratch1, 7);
__ jcc(Assembler::equal, L_fillQuadwords);

// Handle leftovers
__ shlq(rScratch1, 3);
__ shrq(rScratch2, 1);
__ cmpq(rScratch1, rScratch2);
__ jcc(Assembler::aboveEqual, L_exit);
__ decrementq(size);
__ shrq(size, 1);
__ incrementq(size);
__ testb(rScratch1, 3);
__ jcc(Assembler::equal, L_fillDwords);

__ BIND(L_wordsTailLoop);
__ testb(rScratch1, 1);
__ jcc(Assembler::notEqual, L_fillBytes);

__ movw(Address(dest, rScratch1, Address::times_2), wide_value);
__ incrementq(rScratch1);
__ cmpq(size, rScratch1);
__ jccb(Assembler::notEqual, L_wordsTailLoop);
}
__ jmp(L_exit);
// Fill words
{
Label L_wordsTail, L_wordsLoop, L_wordsTailLoop;
UnsafeSetMemoryMark usmm(this, true, true);
////// Set words
__ leaq(rScratch2, Address(size, 1));
__ movq(rScratch1, rScratch2);
__ shrq(rScratch1, 4);
__ cmpq(rScratch2, 16);
__ jccb(Assembler::below, L_wordsTail);
__ leaq(rScratch3, Address(dest, 14));
__ movq(rScratch4, rScratch1);

__ BIND(L_wordsLoop);

// Unroll 8 word stores
for (int i = 7; i >= 0; i--) {
__ movw(Address(rScratch3, -(2 * i)), wide_value);
}

__ BIND(L_fillQuadwords);
__ addq(rScratch3, 16);
__ decrementq(rScratch4);
__ jccb(Assembler::notEqual, L_wordsLoop);

// Fill QUADWORDs
{
Label L_qwordLoop, L_qwordsTail, L_qwordsTailLoop;
UnsafeSetMemoryMark usmm(this, true, true);
__ BIND(L_wordsTail);

__ leaq(rScratch2, Address(size, 7));
__ movq(rScratch1, rScratch2);
__ shrq(rScratch1, 6);
__ cmpq(rScratch2, 64);
__ jccb(Assembler::below, L_qwordsTail);
__ leaq(rScratch3, Address(dest, 56));
__ movq(rScratch4, rScratch1);
// Handle leftovers
__ shlq(rScratch1, 3);
__ shrq(rScratch2, 1);
__ cmpq(rScratch1, rScratch2);
__ jccb(Assembler::aboveEqual, L_exit);
__ decrementq(size);
__ shrq(size, 1);
__ incrementq(size);

__ BIND(L_qwordLoop);
__ BIND(L_wordsTailLoop);

// Unroll 8 qword stores
for (int i = 7; i >= 0; i--) {
__ movq(Address(rScratch3, -(8 * i)), wide_value);
__ movw(Address(dest, rScratch1, Address::times_2), wide_value);
__ incrementq(rScratch1);
__ cmpq(size, rScratch1);
__ jccb(Assembler::notEqual, L_wordsTailLoop);
}
__ addq(rScratch3, 64);
__ decrementq(rScratch4);
__ jccb(Assembler::notZero, L_qwordLoop);

__ BIND(L_qwordsTail);
__ jmp(L_exit);

// Handle leftovers
__ shlq(rScratch1, 3);
__ shrq(rScratch2, 3);
__ cmpq(rScratch1, rScratch2);
__ jcc(Assembler::aboveEqual, L_exit);
__ decrementq(size);
__ shrq(size, 3);
__ incrementq(size);

__ BIND(L_qwordsTailLoop);

__ movq(Address(dest, rScratch1, Address::times_8), wide_value);
__ incrementq(rScratch1);
__ cmpq(size, rScratch1);
__ jccb(Assembler::notEqual, L_qwordsTailLoop);
}
__ BIND(L_exit);
__ BIND(L_fillQuadwords);

restore_arg_regs();
__ ret(0);
// Fill QUADWORDs
{
Label L_qwordLoop, L_qwordsTail, L_qwordsTailLoop;
UnsafeSetMemoryMark usmm(this, true, true);

__ BIND(L_fillDwords);
__ leaq(rScratch2, Address(size, 7));
__ movq(rScratch1, rScratch2);
__ shrq(rScratch1, 6);
__ cmpq(rScratch2, 64);
__ jccb(Assembler::below, L_qwordsTail);
__ leaq(rScratch3, Address(dest, 56));
__ movq(rScratch4, rScratch1);

// Fill DWORDs
{
Label L_dwordLoop, L_dwordsTail, L_dwordsTailLoop;
UnsafeSetMemoryMark usmm(this, true, true);
__ BIND(L_qwordLoop);

__ leaq(rScratch2, Address(size, 3));
__ movq(rScratch1, rScratch2);
__ shrq(rScratch1, 5);
__ cmpq(rScratch2, 32);
__ jccb(Assembler::below, L_dwordsTail);
__ leaq(rScratch3, Address(dest, 28));
__ movq(rScratch4, rScratch1);
// Unroll 8 qword stores
for (int i = 7; i >= 0; i--) {
__ movq(Address(rScratch3, -(8 * i)), wide_value);
}
__ addq(rScratch3, 64);
__ decrementq(rScratch4);
__ jccb(Assembler::notZero, L_qwordLoop);

__ BIND(L_qwordsTail);

// Handle leftovers
__ shlq(rScratch1, 3);
__ shrq(rScratch2, 3);
__ cmpq(rScratch1, rScratch2);
__ jccb(Assembler::aboveEqual, L_exit);
__ decrementq(size);
__ shrq(size, 3);
__ incrementq(size);

__ BIND(L_qwordsTailLoop);

__ movq(Address(dest, rScratch1, Address::times_8), wide_value);
__ incrementq(rScratch1);
__ cmpq(size, rScratch1);
__ jccb(Assembler::notEqual, L_qwordsTailLoop);
}
__ BIND(L_exit);

__ BIND(L_dwordLoop);
restore_arg_regs();
__ ret(0);

// Unroll 8 dword stores
for (int i = 7; i >= 0; i--) {
__ movl(Address(rScratch3, -(i * 4)), wide_value);
}
__ addq(rScratch3, 32);
__ decrementq(rScratch4);
__ jccb(Assembler::notZero, L_dwordLoop);
__ BIND(L_fillDwords);

__ BIND(L_dwordsTail);
// Fill DWORDs
{
Label L_dwordLoop, L_dwordsTail, L_dwordsTailLoop;
UnsafeSetMemoryMark usmm(this, true, true);

#undef rScratch3
#undef rScratch4
__ leaq(rScratch2, Address(size, 3));
__ movq(rScratch1, rScratch2);
__ shrq(rScratch1, 5);
__ cmpq(rScratch2, 32);
__ jccb(Assembler::below, L_dwordsTail);
__ leaq(rScratch3, Address(dest, 28));
__ movq(rScratch4, rScratch1);

// Handle leftovers
__ shlq(rScratch1, 3);
__ shrq(rScratch2, 2);
__ cmpq(rScratch1, rScratch2);
__ jccb(Assembler::aboveEqual, L_exit);
__ decrementq(size);
__ shrq(size, 2);
__ incrementq(size);
__ BIND(L_dwordLoop);

__ BIND(L_dwordsTailLoop);
// Unroll 8 dword stores
for (int i = 7; i >= 0; i--) {
__ movl(Address(rScratch3, -(i * 4)), wide_value);
}
__ addq(rScratch3, 32);
__ decrementq(rScratch4);
__ jccb(Assembler::notZero, L_dwordLoop);

__ BIND(L_dwordsTail);

// Handle leftovers
__ shlq(rScratch1, 3);
__ shrq(rScratch2, 2);
__ cmpq(rScratch1, rScratch2);
__ jccb(Assembler::aboveEqual, L_exit);
__ decrementq(size);
__ shrq(size, 2);
__ incrementq(size);

__ BIND(L_dwordsTailLoop);

__ movl(Address(dest, rScratch1, Address::times_4), wide_value);
__ incrementq(rScratch1);
__ cmpq(size, rScratch1);
__ jccb(Assembler::notEqual, L_dwordsTailLoop);
}
__ jmp(L_exit);

__ movl(Address(dest, rScratch1, Address::times_4), wide_value);
__ incrementq(rScratch1);
__ cmpq(size, rScratch1);
__ jccb(Assembler::notEqual, L_dwordsTailLoop);
__ BIND(L_fillBytes);
}
__ jmpb(L_exit);

__ BIND(L_fillBytes);
#ifdef MUSL_LIBC
{
Label L_byteLoop, L_longByteLoop, L_byteTail, L_byteTailLoop;
UnsafeSetMemoryMark usmm(this, true, true);

#undef wide_value
#define savedSize rax
#undef rScratch2
#define byteVal rdx
const Register savedSize = rax;
const Register byteVal = rdx;

UnsafeSetMemoryMark usmm(this, true, true);

__ movq(savedSize, size);
__ andq(savedSize, 7);
Expand Down Expand Up @@ -2733,13 +2726,14 @@ address StubGenerator::generate_unsafe_setmemory(const char *name,
__ cmpq(savedSize, rScratch1);
__ jccb(Assembler::notEqual, L_byteTailLoop);
}
__ jmp(L_exit);
#else // MUSL_LIBC
#define byteVal rdx
{
const Register byteVal = rdx;
#ifdef _WIN32
__ movq(rcx, rdi); // Restore c_rarg*
__ movq(rdx, rsi);
__ movq(r8, rdx);
__ movq(rdx, rsi);
restore_arg_regs();
#endif
// rax has expanded byte value
Expand All @@ -2762,12 +2756,6 @@ address StubGenerator::generate_unsafe_setmemory(const char *name,
__ ret(0);
}
#endif // MUSL_LIBC

#undef dest
#undef size
#undef savedSize
#undef rScratch1
#undef byteVal
}

return start;
Expand Down