From e7cf3a831d76c4853d671a398af5204eaae73a61 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 4 Apr 2025 04:03:49 +0000 Subject: [PATCH 01/13] s390: unsafe::setMemory Port --- .../cpu/s390/assembler_s390.inline.hpp | 11 +- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 125 ++++++++++++++++++ 2 files changed, 132 insertions(+), 4 deletions(-) diff --git a/src/hotspot/cpu/s390/assembler_s390.inline.hpp b/src/hotspot/cpu/s390/assembler_s390.inline.hpp index 567f3d75a62c3..419a0632a1cad 100644 --- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp +++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp @@ -415,10 +415,13 @@ inline void Assembler::z_rosbg( Register r1, Register r2, int64_t spos3, int64_t } inline void Assembler::z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest) { // Rotate then INS selected bits. -- z196 const int64_t len = 48; - assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion. - assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion. - assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. - emit_48( RISBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(zero_rest ? 1 : 0, len-24-1, len-24-1)); + assert(Immediate::is_uimm(spos3, 8), "range start out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(epos4, 8), "range end out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(nrot5, 8), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. + assert((spos3 & 192) == 0, "bits 0, 1 of I3 field are reserved"); + assert((epos4 & 64) == 0, "bit 1 of I4 field is reserved"); + assert((nrot5 & 192) == 0, "bits 0, 1 of I5 field are ignored by instruction, make sure that will not cause trouble"); + emit_48( RISBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm8(spos3, 16, len) | uimm8(epos4, 24, len) | uimm8(nrot5, 32, len) | u_field(zero_rest ? 1 : 0, len-24-1, len-24-1)); } diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index b46393f543e87..42f0b185f481c 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1469,10 +1469,127 @@ class StubGenerator: public StubCodeGenerator { } + // Helper for generate_unsafe_setmemory + // + // Atomically fill an array of memory using 1-, 2-, 4-, or 8-byte chunks and return. + static void do_setmemory_atomic_loop(int elem_size, Register dest, Register size, Register byteVal, + MacroAssembler *_masm) { + + NearLabel L_Loop, L_Tail; // 2x unrolled loop + Register tmp = Z_R1; // R1 is free at this point + + if (elem_size > 1) { + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, 0); + } + + if (elem_size > 2) { + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, 0); + } + + if (elem_size > 4) { + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, 0); + } + + __ z_risbg(tmp, size, 32, 128/* risbgz */ + 63, 64 - exact_log2(2 * elem_size), 0); // just do the right shift and set cc + __ z_bre(L_Tail); + + __ align(16); // loop alignment + __ bind(L_Loop); + __ store_sized_value(byteVal, Address(dest, 0), elem_size); + __ store_sized_value(byteVal, Address(dest, elem_size), elem_size); + __ z_agfi(dest, 2 * elem_size); + __ z_brct(tmp, L_Loop); + + __ bind(L_Tail); + __ z_nilf(size, elem_size); + __ z_bcr(Assembler::bcondEqual, Z_R14); + __ store_sized_value(byteVal, Address(dest, 0), elem_size); + __ z_br(Z_R14); + } + + // + // Generate 'unsafe' set memory stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t (# bytes) argument instead of an element count. + // + // Input: + // Z_ARG1 - destination array address + // Z_ARG2 - byte count (size_t) + // Z_ARG3 - byte value + // + address generate_unsafe_setmemory(address unsafe_byte_fill) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, StubGenStubId::unsafe_setmemory_id); + unsigned int start_off = __ offset(); + + // bump this on entry, not on exit: + // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr); + + { + NearLabel L_fill8Bytes, L_fill4Bytes, L_fillBytes, L_exit; + + const Register dest = Z_ARG1; + const Register size = Z_ARG2; + const Register byteVal = Z_ARG3; + const Register rScratch1 = Z_R1_scratch; + // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char) + + // Check for pointer & size alignment + __ z_ogrk(rScratch1, dest, size); + + __ z_nill(rScratch1, 7); + __ z_bre(L_fill8Bytes); // branch if 0 + + + __ z_nill(rScratch1, 3); + __ z_bre(L_fill4Bytes); // branch if 0 + + __ z_nill(rScratch1, 1); + __ z_brne(L_fillBytes); // branch if not 0 + + // Mark remaining code as such which performs Unsafe accesses. + UnsafeMemoryAccessMark umam(this, true, false); + + // At this point, we know the lower bit of size is zero and a + // multiple of 2 + do_setmemory_atomic_loop(2, dest, size, byteVal, _masm); + + __ align(16); + __ bind(L_fill8Bytes); + // At this point, we know the lower 3 bits of size are zero and a + // multiple of 8 + do_setmemory_atomic_loop(8, dest, size, byteVal, _masm); + + __ align(16); + __ bind(L_fill4Bytes); + // At this point, we know the lower 2 bits of size are zero and a + // multiple of 4 + do_setmemory_atomic_loop(4, dest, size, byteVal, _masm); + + __ align(16); + __ bind(L_fillBytes); + do_setmemory_atomic_loop(1, dest, size, byteVal, _masm); + } + + return __ addr_at(start_off); + } + + // This is common errorexit stub for UnsafeMemoryAccess. + address generate_unsafecopy_common_error_exit() { + unsigned int start_off = __ offset(); + __ z_lghi(Z_RET, 0); // return 0 + __ z_br(Z_R14); + return __ addr_at(start_off); + } + void generate_arraycopy_stubs() { // Note: the disjoint stubs must be generated first, some of // the conjoint stubs use them. + + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit); + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::jbyte_disjoint_arraycopy_id); StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_nonoop_copy(StubGenStubId::jshort_disjoint_arraycopy_id); StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::jint_disjoint_arraycopy_id); @@ -1500,6 +1617,10 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::arrayof_jlong_arraycopy_id); StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_id); StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_uninit_id); + +#ifdef COMPILER2 + StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory(StubRoutines::_jbyte_fill); +#endif // COMPILER2 } // Call interface for AES_encryptBlock, AES_decryptBlock stubs. @@ -3184,6 +3305,10 @@ class StubGenerator: public StubCodeGenerator { //---------------------------------------------------------------------- // Entry points that are platform specific. + if (UnsafeMemoryAccess::_table == nullptr) { + UnsafeMemoryAccess::create_table(4); // 4 for setMemory + } + if (UseCRC32Intrinsics) { StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes(); From bfbf99b7efe54e01eec3e4d737c1c57766287e34 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Tue, 8 Apr 2025 07:12:06 +0000 Subject: [PATCH 02/13] reviews from Lutz and Martin --- src/hotspot/cpu/s390/assembler_s390.inline.hpp | 11 ++++------- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 17 +++++++---------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/hotspot/cpu/s390/assembler_s390.inline.hpp b/src/hotspot/cpu/s390/assembler_s390.inline.hpp index 419a0632a1cad..567f3d75a62c3 100644 --- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp +++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp @@ -415,13 +415,10 @@ inline void Assembler::z_rosbg( Register r1, Register r2, int64_t spos3, int64_t } inline void Assembler::z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest) { // Rotate then INS selected bits. -- z196 const int64_t len = 48; - assert(Immediate::is_uimm(spos3, 8), "range start out of range"); // Could just trim to 6bits wide w/o assertion. - assert(Immediate::is_uimm(epos4, 8), "range end out of range"); // Could just trim to 6bits wide w/o assertion. - assert(Immediate::is_uimm(nrot5, 8), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. - assert((spos3 & 192) == 0, "bits 0, 1 of I3 field are reserved"); - assert((epos4 & 64) == 0, "bit 1 of I4 field is reserved"); - assert((nrot5 & 192) == 0, "bits 0, 1 of I5 field are ignored by instruction, make sure that will not cause trouble"); - emit_48( RISBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm8(spos3, 16, len) | uimm8(epos4, 24, len) | uimm8(nrot5, 32, len) | u_field(zero_rest ? 1 : 0, len-24-1, len-24-1)); + assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. + emit_48( RISBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(zero_rest ? 1 : 0, len-24-1, len-24-1)); } diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index 42f0b185f481c..cec84a262404c 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1490,14 +1490,14 @@ class StubGenerator: public StubCodeGenerator { __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, 0); } - __ z_risbg(tmp, size, 32, 128/* risbgz */ + 63, 64 - exact_log2(2 * elem_size), 0); // just do the right shift and set cc + __ z_risbg(tmp, size, 32, 63, 64 - exact_log2(2 * elem_size), /* zero_rest */ true); // just do the right shift and set cc __ z_bre(L_Tail); - __ align(16); // loop alignment + __ align(32); // loop alignment __ bind(L_Loop); __ store_sized_value(byteVal, Address(dest, 0), elem_size); __ store_sized_value(byteVal, Address(dest, elem_size), elem_size); - __ z_agfi(dest, 2 * elem_size); + __ z_aghi(dest, 2 * elem_size); __ z_brct(tmp, L_Loop); __ bind(L_Tail); @@ -1526,7 +1526,7 @@ class StubGenerator: public StubCodeGenerator { // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr); { - NearLabel L_fill8Bytes, L_fill4Bytes, L_fillBytes, L_exit; + NearLabel L_fill8Bytes, L_fill4Bytes, L_fillBytes; const Register dest = Z_ARG1; const Register size = Z_ARG2; @@ -1538,14 +1538,14 @@ class StubGenerator: public StubCodeGenerator { __ z_ogrk(rScratch1, dest, size); __ z_nill(rScratch1, 7); - __ z_bre(L_fill8Bytes); // branch if 0 + __ z_braz(L_fill8Bytes); // branch if 0 __ z_nill(rScratch1, 3); - __ z_bre(L_fill4Bytes); // branch if 0 + __ z_braz(L_fill4Bytes); // branch if 0 __ z_nill(rScratch1, 1); - __ z_brne(L_fillBytes); // branch if not 0 + __ z_brnaz(L_fillBytes); // branch if not 0 // Mark remaining code as such which performs Unsafe accesses. UnsafeMemoryAccessMark umam(this, true, false); @@ -1554,19 +1554,16 @@ class StubGenerator: public StubCodeGenerator { // multiple of 2 do_setmemory_atomic_loop(2, dest, size, byteVal, _masm); - __ align(16); __ bind(L_fill8Bytes); // At this point, we know the lower 3 bits of size are zero and a // multiple of 8 do_setmemory_atomic_loop(8, dest, size, byteVal, _masm); - __ align(16); __ bind(L_fill4Bytes); // At this point, we know the lower 2 bits of size are zero and a // multiple of 4 do_setmemory_atomic_loop(4, dest, size, byteVal, _masm); - __ align(16); __ bind(L_fillBytes); do_setmemory_atomic_loop(1, dest, size, byteVal, _masm); } From a6af6da26d1e0590dc24486131d1bc752e047f98 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 9 Apr 2025 07:26:57 +0000 Subject: [PATCH 03/13] minor improvement --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index cec84a262404c..6c21e7e9de7c3 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1476,7 +1476,8 @@ class StubGenerator: public StubCodeGenerator { MacroAssembler *_masm) { NearLabel L_Loop, L_Tail; // 2x unrolled loop - Register tmp = Z_R1; // R1 is free at this point + Register tmp = Z_R0; // R1 is free at this point + Register tmp1 = Z_R1; if (elem_size > 1) { __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, 0); @@ -1494,9 +1495,22 @@ class StubGenerator: public StubCodeGenerator { __ z_bre(L_Tail); __ align(32); // loop alignment - __ bind(L_Loop); + __ z_lgr(tmp1, dest); __ store_sized_value(byteVal, Address(dest, 0), elem_size); __ store_sized_value(byteVal, Address(dest, elem_size), elem_size); + + __ z_aghi(dest, 2 * elem_size); + __ z_aghi(tmp, -1); + + __ z_ltr(tmp, tmp); + __ z_bcr(Assembler::bcondEqual, Z_R14); + + __ bind(L_Loop); + __ z_mvc( + Address(dest, 0), // move to + Address(tmp1, 0), // move from + 2 * elem_size // size of data + ); __ z_aghi(dest, 2 * elem_size); __ z_brct(tmp, L_Loop); From 7e3bb5eb198fa9b06c334e1c9521c11569565253 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 9 Apr 2025 08:47:19 +0000 Subject: [PATCH 04/13] Revert "minor improvement" This reverts commit a6af6da26d1e0590dc24486131d1bc752e047f98. --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index 6c21e7e9de7c3..cec84a262404c 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1476,8 +1476,7 @@ class StubGenerator: public StubCodeGenerator { MacroAssembler *_masm) { NearLabel L_Loop, L_Tail; // 2x unrolled loop - Register tmp = Z_R0; // R1 is free at this point - Register tmp1 = Z_R1; + Register tmp = Z_R1; // R1 is free at this point if (elem_size > 1) { __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, 0); @@ -1495,22 +1494,9 @@ class StubGenerator: public StubCodeGenerator { __ z_bre(L_Tail); __ align(32); // loop alignment - __ z_lgr(tmp1, dest); + __ bind(L_Loop); __ store_sized_value(byteVal, Address(dest, 0), elem_size); __ store_sized_value(byteVal, Address(dest, elem_size), elem_size); - - __ z_aghi(dest, 2 * elem_size); - __ z_aghi(tmp, -1); - - __ z_ltr(tmp, tmp); - __ z_bcr(Assembler::bcondEqual, Z_R14); - - __ bind(L_Loop); - __ z_mvc( - Address(dest, 0), // move to - Address(tmp1, 0), // move from - 2 * elem_size // size of data - ); __ z_aghi(dest, 2 * elem_size); __ z_brct(tmp, L_Loop); From 1b8ea8bb44919bd0a84137040380f173ed41ead3 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 9 Apr 2025 08:50:44 +0000 Subject: [PATCH 05/13] reviews for Martin --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index cec84a262404c..e7b6b3cd6b9a1 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1479,15 +1479,15 @@ class StubGenerator: public StubCodeGenerator { Register tmp = Z_R1; // R1 is free at this point if (elem_size > 1) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, 0); + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, false); } if (elem_size > 2) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, 0); + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, false); } if (elem_size > 4) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, 0); + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, false); } __ z_risbg(tmp, size, 32, 63, 64 - exact_log2(2 * elem_size), /* zero_rest */ true); // just do the right shift and set cc @@ -1540,7 +1540,6 @@ class StubGenerator: public StubCodeGenerator { __ z_nill(rScratch1, 7); __ z_braz(L_fill8Bytes); // branch if 0 - __ z_nill(rScratch1, 3); __ z_braz(L_fill4Bytes); // branch if 0 From f1d075a3e0f611f54ea341f5b70684b1adbac30b Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 16 Apr 2025 06:07:50 +0000 Subject: [PATCH 06/13] [wip] initial mvc template solution --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 83 +++++-------------- .../lang/foreign/MemorySegmentZeroUnsafe.java | 10 +-- 2 files changed, 28 insertions(+), 65 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index e7b6b3cd6b9a1..4bf613c988a6e 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1471,40 +1471,36 @@ class StubGenerator: public StubCodeGenerator { // Helper for generate_unsafe_setmemory // - // Atomically fill an array of memory using 1-, 2-, 4-, or 8-byte chunks and return. - static void do_setmemory_atomic_loop(int elem_size, Register dest, Register size, Register byteVal, + // Atomically fill an array of memory using 1 byte chunk and return. + static void do_setmemory_atomic_loop(Register dest, Register size, Register byteVal, MacroAssembler *_masm) { - NearLabel L_Loop, L_Tail; // 2x unrolled loop - Register tmp = Z_R1; // R1 is free at this point + NearLabel L_loop, L_tail, L_mvc; - if (elem_size > 1) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, false); - } - - if (elem_size > 2) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, false); - } + __ z_cghi(size, 256); + __ z_brc(Assembler::bcondLow, L_tail); // size is <256 - if (elem_size > 4) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, false); - } + // handle size >= 256 + __ bind(L_loop); + __ z_stc(byteVal, Address(dest)); + __ z_mvc(1, 255, dest, 0, dest); + __ z_aghi(size, -256); + __ z_aghi(dest, 256); + __ z_cghi(size, 256); + __ z_brh(L_loop); - __ z_risbg(tmp, size, 32, 63, 64 - exact_log2(2 * elem_size), /* zero_rest */ true); // just do the right shift and set cc - __ z_bre(L_Tail); + __ z_ltr(size, size); + __ z_bcr(Assembler::bcondZero, Z_R14); // size is 0 - __ align(32); // loop alignment - __ bind(L_Loop); - __ store_sized_value(byteVal, Address(dest, 0), elem_size); - __ store_sized_value(byteVal, Address(dest, elem_size), elem_size); - __ z_aghi(dest, 2 * elem_size); - __ z_brct(tmp, L_Loop); + __ bind(L_tail); + __ z_stc(byteVal, Address(dest)); + __ z_exrl(size, L_mvc); - __ bind(L_Tail); - __ z_nilf(size, elem_size); - __ z_bcr(Assembler::bcondEqual, Z_R14); - __ store_sized_value(byteVal, Address(dest, 0), elem_size); __ z_br(Z_R14); + + __ bind(L_mvc); + __ z_mvc(1, 0, dest, 0, dest); // mvc template, needs to be generated, not executed + } // @@ -1526,47 +1522,14 @@ class StubGenerator: public StubCodeGenerator { // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr); { - NearLabel L_fill8Bytes, L_fill4Bytes, L_fillBytes; - const Register dest = Z_ARG1; const Register size = Z_ARG2; const Register byteVal = Z_ARG3; const Register rScratch1 = Z_R1_scratch; // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char) - // Check for pointer & size alignment - __ z_ogrk(rScratch1, dest, size); - - __ z_nill(rScratch1, 7); - __ z_braz(L_fill8Bytes); // branch if 0 - - __ z_nill(rScratch1, 3); - __ z_braz(L_fill4Bytes); // branch if 0 - - __ z_nill(rScratch1, 1); - __ z_brnaz(L_fillBytes); // branch if not 0 - - // Mark remaining code as such which performs Unsafe accesses. - UnsafeMemoryAccessMark umam(this, true, false); - - // At this point, we know the lower bit of size is zero and a - // multiple of 2 - do_setmemory_atomic_loop(2, dest, size, byteVal, _masm); - - __ bind(L_fill8Bytes); - // At this point, we know the lower 3 bits of size are zero and a - // multiple of 8 - do_setmemory_atomic_loop(8, dest, size, byteVal, _masm); - - __ bind(L_fill4Bytes); - // At this point, we know the lower 2 bits of size are zero and a - // multiple of 4 - do_setmemory_atomic_loop(4, dest, size, byteVal, _masm); - - __ bind(L_fillBytes); - do_setmemory_atomic_loop(1, dest, size, byteVal, _masm); + do_setmemory_atomic_loop(dest, size, byteVal, _masm); } - return __ addr_at(start_off); } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java index 09e5a1e3e77cb..6412ca8a376f4 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java @@ -50,7 +50,7 @@ public class MemorySegmentZeroUnsafe { static final Unsafe UNSAFE = Utils.unsafe; long src; - @Param({"1", "2", "3", "4", "5", "6", "7", "8", "15", "16", "63", "64", "255", "256"}) + @Param({"255"}) public int size; @Param({"true", "false"}) @@ -81,10 +81,10 @@ public void setup() throws Throwable { address = segment.address(); } - @Benchmark - public void panama() { - segment.fill((byte) 0); - } +// @Benchmark +// public void panama() { +// segment.fill((byte) 0); +// } @Benchmark public void unsafe() { From 36ef2e434331253e26fa3774c05f6b6862b6545b Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 16 Apr 2025 11:27:42 +0000 Subject: [PATCH 07/13] wip: fixed the regression --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 76 ++++++++++++++++++- .../lang/foreign/MemorySegmentZeroUnsafe.java | 11 +-- 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index 4bf613c988a6e..ad5cdb3ecc9ca 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1472,7 +1472,9 @@ class StubGenerator: public StubCodeGenerator { // Helper for generate_unsafe_setmemory // // Atomically fill an array of memory using 1 byte chunk and return. - static void do_setmemory_atomic_loop(Register dest, Register size, Register byteVal, + // We don't care about atomicity because the address and size are not aligned, So we are + // free to fill the memory with best possible ways. + static void do_setmemory_atomic_loop_mvc(Register dest, Register size, Register byteVal, MacroAssembler *_masm) { NearLabel L_loop, L_tail, L_mvc; @@ -1494,6 +1496,9 @@ class StubGenerator: public StubCodeGenerator { __ bind(L_tail); __ z_stc(byteVal, Address(dest)); + __ z_aghi(size, -2); + __ z_cghi(size, 0); + __ z_bcr(Assembler::bcondLow, Z_R14); // size < 0 __ z_exrl(size, L_mvc); __ z_br(Z_R14); @@ -1503,6 +1508,41 @@ class StubGenerator: public StubCodeGenerator { } + static void do_setmemory_atomic_loop(int elem_size, Register dest, Register size, Register byteVal, + MacroAssembler *_masm) { + + NearLabel L_Loop, L_Tail; // 2x unrolled loop + Register tmp = Z_R1; // R1 is free at this point + + if (elem_size > 1) { + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, 0); + } + + if (elem_size > 2) { + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, 0); + } + + if (elem_size > 4) { + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, 0); + } + + __ z_risbg(tmp, size, 32, 63, 64 - exact_log2(2 * elem_size), /* zero_rest */ true); // just do the right shift and set cc + __ z_bre(L_Tail); + + __ align(32); // loop alignment + __ bind(L_Loop); + __ store_sized_value(byteVal, Address(dest, 0), elem_size); + __ store_sized_value(byteVal, Address(dest, elem_size), elem_size); + __ z_aghi(dest, 2 * elem_size); + __ z_brct(tmp, L_Loop); + + __ bind(L_Tail); + __ z_nilf(size, elem_size); + __ z_bcr(Assembler::bcondEqual, Z_R14); + __ store_sized_value(byteVal, Address(dest, 0), elem_size); + __ z_br(Z_R14); + } + // // Generate 'unsafe' set memory stub // Though just as safe as the other stubs, it takes an unscaled @@ -1526,9 +1566,41 @@ class StubGenerator: public StubCodeGenerator { const Register size = Z_ARG2; const Register byteVal = Z_ARG3; const Register rScratch1 = Z_R1_scratch; + NearLabel L_fill8Bytes, L_fill4Bytes, L_fillBytes; // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char) - do_setmemory_atomic_loop(dest, size, byteVal, _masm); + // Check for pointer & size alignment + __ z_ogrk(rScratch1, dest, size); + + __ z_nill(rScratch1, 7); + __ z_braz(L_fill8Bytes); // branch if 0 + + + __ z_nill(rScratch1, 3); + __ z_braz(L_fill4Bytes); // branch if 0 + + __ z_nill(rScratch1, 1); + __ z_brnaz(L_fillBytes); // branch if not 0 + + // Mark remaining code as such which performs Unsafe accesses. + UnsafeMemoryAccessMark umam(this, true, false); + + // At this point, we know the lower bit of size is zero and a + // multiple of 2 + do_setmemory_atomic_loop(2, dest, size, byteVal, _masm); + + __ bind(L_fill8Bytes); + // At this point, we know the lower 3 bits of size are zero and a + // multiple of 8 + do_setmemory_atomic_loop(8, dest, size, byteVal, _masm); + + __ bind(L_fill4Bytes); + // At this point, we know the lower 2 bits of size are zero and a + // multiple of 4 + do_setmemory_atomic_loop(4, dest, size, byteVal, _masm); + + __ bind(L_fillBytes); + do_setmemory_atomic_loop_mvc(dest, size, byteVal, _masm); } return __ addr_at(start_off); } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java index 6412ca8a376f4..be15de048a197 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java @@ -50,7 +50,7 @@ public class MemorySegmentZeroUnsafe { static final Unsafe UNSAFE = Utils.unsafe; long src; - @Param({"255"}) + @Param({"1", "2", "3", "4", "5", "6", "7", "8", "15", "16", "63", "64", "255", "256"}) public int size; @Param({"true", "false"}) @@ -81,13 +81,14 @@ public void setup() throws Throwable { address = segment.address(); } -// @Benchmark -// public void panama() { -// segment.fill((byte) 0); -// } + @Benchmark + public void panama() { + segment.fill((byte) 0); + } @Benchmark public void unsafe() { UNSAFE.setMemory(address, size, (byte) 0); } } + From 1fc63a3fb28df61def77e49bc4abce9d835c3eb9 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 16 Apr 2025 11:40:58 +0000 Subject: [PATCH 08/13] extra line --- .../openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java | 1 - 1 file changed, 1 deletion(-) diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java index be15de048a197..09e5a1e3e77cb 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/MemorySegmentZeroUnsafe.java @@ -91,4 +91,3 @@ public void unsafe() { UNSAFE.setMemory(address, size, (byte) 0); } } - From 14e7654a0edf5662abc8a943349e7c8021997b8d Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Thu, 17 Apr 2025 09:20:55 +0000 Subject: [PATCH 09/13] fix testcases --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index ad5cdb3ecc9ca..06e388ee4f009 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1485,7 +1485,7 @@ class StubGenerator: public StubCodeGenerator { // handle size >= 256 __ bind(L_loop); __ z_stc(byteVal, Address(dest)); - __ z_mvc(1, 255, dest, 0, dest); + __ z_mvc(1, 254, dest, 0, dest); __ z_aghi(size, -256); __ z_aghi(dest, 256); __ z_cghi(size, 256); From 99dd07c26aeb31448693535abe8d174ab0e15b2e Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Thu, 17 Apr 2025 09:59:45 +0000 Subject: [PATCH 10/13] improvement --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index 06e388ee4f009..d144b87b2a53f 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1471,7 +1471,7 @@ class StubGenerator: public StubCodeGenerator { // Helper for generate_unsafe_setmemory // - // Atomically fill an array of memory using 1 byte chunk and return. + // Non-atomically fill an array of memory using 1 byte chunk and return. // We don't care about atomicity because the address and size are not aligned, So we are // free to fill the memory with best possible ways. static void do_setmemory_atomic_loop_mvc(Register dest, Register size, Register byteVal, @@ -1486,8 +1486,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(L_loop); __ z_stc(byteVal, Address(dest)); __ z_mvc(1, 254, dest, 0, dest); + __ z_aghi(dest, 256); // increment the address by 256 __ z_aghi(size, -256); - __ z_aghi(dest, 256); __ z_cghi(size, 256); __ z_brh(L_loop); @@ -1496,8 +1496,7 @@ class StubGenerator: public StubCodeGenerator { __ bind(L_tail); __ z_stc(byteVal, Address(dest)); - __ z_aghi(size, -2); - __ z_cghi(size, 0); + __ z_aghi(size, -2); // aghi will set the condition code for "size==zero", "sizezero" __ z_bcr(Assembler::bcondLow, Z_R14); // size < 0 __ z_exrl(size, L_mvc); @@ -1515,15 +1514,15 @@ class StubGenerator: public StubCodeGenerator { Register tmp = Z_R1; // R1 is free at this point if (elem_size > 1) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, 0); + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, false); } if (elem_size > 2) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, 0); + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, false); } if (elem_size > 4) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, 0); + __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, false); } __ z_risbg(tmp, size, 32, 63, 64 - exact_log2(2 * elem_size), /* zero_rest */ true); // just do the right shift and set cc From cf709eec9b2a8b5f61d0c06846cf864b5469f263 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Thu, 17 Apr 2025 10:21:12 +0000 Subject: [PATCH 11/13] extra line --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index d144b87b2a53f..199b170a8df6c 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1574,7 +1574,6 @@ class StubGenerator: public StubCodeGenerator { __ z_nill(rScratch1, 7); __ z_braz(L_fill8Bytes); // branch if 0 - __ z_nill(rScratch1, 3); __ z_braz(L_fill4Bytes); // branch if 0 From f75209f54885f0742d80cb39ecb4f1780388bbe1 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 23 Apr 2025 06:05:42 +0000 Subject: [PATCH 12/13] improved mvc implementation --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 35 ++++++++++----------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index 199b170a8df6c..d3df241325479 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1475,36 +1475,33 @@ class StubGenerator: public StubCodeGenerator { // We don't care about atomicity because the address and size are not aligned, So we are // free to fill the memory with best possible ways. static void do_setmemory_atomic_loop_mvc(Register dest, Register size, Register byteVal, - MacroAssembler *_masm) { - + MacroAssembler *_masm) { NearLabel L_loop, L_tail, L_mvc; - __ z_cghi(size, 256); - __ z_brc(Assembler::bcondLow, L_tail); // size is <256 + __ z_aghi(size, -1); // -1 because first byte is preset by stc + __ z_bcr(Assembler::bcondLow, Z_R14); // result < 0 means size == 0 => return + __ z_stc(byteVal, Address(dest)); // initialize first byte + __ z_bcr(Assembler::bcondEqual, Z_R14); // result == 0 means size == 1 => return - // handle size >= 256 + // handle complete 256 byte blocks __ bind(L_loop); - __ z_stc(byteVal, Address(dest)); - __ z_mvc(1, 254, dest, 0, dest); - __ z_aghi(dest, 256); // increment the address by 256 - __ z_aghi(size, -256); - __ z_cghi(size, 256); - __ z_brh(L_loop); + __ z_aghi(size, -256); // decrement remaining #bytes + __ z_brl(L_tail); // skip loop if no full 256 byte block left + + __ z_mvc(1, 255, dest, 0, dest); // propagate byte from dest[0+i*256] to dest[1+i*256] + __ z_bcr(Assembler::bcondEqual, Z_R14); // remaining size == 0 => return (mvc does not touch CC) - __ z_ltr(size, size); - __ z_bcr(Assembler::bcondZero, Z_R14); // size is 0 + __ z_aghi(dest, 256); // increment target address + __ z_bru(L_loop); + // handle remaining bytes. We know 0 < size < 256 __ bind(L_tail); - __ z_stc(byteVal, Address(dest)); - __ z_aghi(size, -2); // aghi will set the condition code for "size==zero", "sizezero" - __ z_bcr(Assembler::bcondLow, Z_R14); // size < 0 + __ z_aghi(size, +256-1); // prepare size value for mvc via exrl __ z_exrl(size, L_mvc); - __ z_br(Z_R14); __ bind(L_mvc); - __ z_mvc(1, 0, dest, 0, dest); // mvc template, needs to be generated, not executed - + __ z_mvc(1, 0, dest, 0, dest); // mvc template, needs to be generated, not executed } static void do_setmemory_atomic_loop(int elem_size, Register dest, Register size, Register byteVal, From d79a841fa25b5fbb5f1343247fa5bf7744438104 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Mon, 26 May 2025 04:04:43 +0000 Subject: [PATCH 13/13] switch to vector stores --- src/hotspot/cpu/s390/stubGenerator_s390.cpp | 172 ++++++++------------ 1 file changed, 70 insertions(+), 102 deletions(-) diff --git a/src/hotspot/cpu/s390/stubGenerator_s390.cpp b/src/hotspot/cpu/s390/stubGenerator_s390.cpp index d3df241325479..1e9136cdca9c9 100644 --- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp @@ -1468,77 +1468,6 @@ class StubGenerator: public StubCodeGenerator { return __ addr_at(start_off); } - - // Helper for generate_unsafe_setmemory - // - // Non-atomically fill an array of memory using 1 byte chunk and return. - // We don't care about atomicity because the address and size are not aligned, So we are - // free to fill the memory with best possible ways. - static void do_setmemory_atomic_loop_mvc(Register dest, Register size, Register byteVal, - MacroAssembler *_masm) { - NearLabel L_loop, L_tail, L_mvc; - - __ z_aghi(size, -1); // -1 because first byte is preset by stc - __ z_bcr(Assembler::bcondLow, Z_R14); // result < 0 means size == 0 => return - __ z_stc(byteVal, Address(dest)); // initialize first byte - __ z_bcr(Assembler::bcondEqual, Z_R14); // result == 0 means size == 1 => return - - // handle complete 256 byte blocks - __ bind(L_loop); - __ z_aghi(size, -256); // decrement remaining #bytes - __ z_brl(L_tail); // skip loop if no full 256 byte block left - - __ z_mvc(1, 255, dest, 0, dest); // propagate byte from dest[0+i*256] to dest[1+i*256] - __ z_bcr(Assembler::bcondEqual, Z_R14); // remaining size == 0 => return (mvc does not touch CC) - - __ z_aghi(dest, 256); // increment target address - __ z_bru(L_loop); - - // handle remaining bytes. We know 0 < size < 256 - __ bind(L_tail); - __ z_aghi(size, +256-1); // prepare size value for mvc via exrl - __ z_exrl(size, L_mvc); - __ z_br(Z_R14); - - __ bind(L_mvc); - __ z_mvc(1, 0, dest, 0, dest); // mvc template, needs to be generated, not executed - } - - static void do_setmemory_atomic_loop(int elem_size, Register dest, Register size, Register byteVal, - MacroAssembler *_masm) { - - NearLabel L_Loop, L_Tail; // 2x unrolled loop - Register tmp = Z_R1; // R1 is free at this point - - if (elem_size > 1) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 8 , 63 - 8, 8, false); - } - - if (elem_size > 2) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 16, 63 - 16, 16, false); - } - - if (elem_size > 4) { - __ rotate_then_insert(byteVal, byteVal, 64 - 2 * 32, 63 - 32, 32, false); - } - - __ z_risbg(tmp, size, 32, 63, 64 - exact_log2(2 * elem_size), /* zero_rest */ true); // just do the right shift and set cc - __ z_bre(L_Tail); - - __ align(32); // loop alignment - __ bind(L_Loop); - __ store_sized_value(byteVal, Address(dest, 0), elem_size); - __ store_sized_value(byteVal, Address(dest, elem_size), elem_size); - __ z_aghi(dest, 2 * elem_size); - __ z_brct(tmp, L_Loop); - - __ bind(L_Tail); - __ z_nilf(size, elem_size); - __ z_bcr(Assembler::bcondEqual, Z_R14); - __ store_sized_value(byteVal, Address(dest, 0), elem_size); - __ z_br(Z_R14); - } - // // Generate 'unsafe' set memory stub // Though just as safe as the other stubs, it takes an unscaled @@ -1557,46 +1486,83 @@ class StubGenerator: public StubCodeGenerator { // bump this on entry, not on exit: // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr); - { - const Register dest = Z_ARG1; - const Register size = Z_ARG2; - const Register byteVal = Z_ARG3; - const Register rScratch1 = Z_R1_scratch; - NearLabel L_fill8Bytes, L_fill4Bytes, L_fillBytes; - // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char) + const Register dest = Z_ARG1; + const Register size = Z_ARG2; + const Register byteVal = Z_ARG3; + NearLabel tail, finished; + // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char) - // Check for pointer & size alignment - __ z_ogrk(rScratch1, dest, size); + // Mark remaining code as such which performs Unsafe accesses. + UnsafeMemoryAccessMark umam(this, true, false); - __ z_nill(rScratch1, 7); - __ z_braz(L_fill8Bytes); // branch if 0 + __ z_vlvgb(Z_V0, byteVal, 0); + __ z_vrepb(Z_V0, Z_V0, 0); - __ z_nill(rScratch1, 3); - __ z_braz(L_fill4Bytes); // branch if 0 + __ z_aghi(size, -32); + __ z_brl(tail); - __ z_nill(rScratch1, 1); - __ z_brnaz(L_fillBytes); // branch if not 0 + { + NearLabel again; + __ bind(again); + __ z_vst(Z_V0, Address(dest, 0)); + __ z_vst(Z_V0, Address(dest, 16)); + __ z_aghi(dest, 32); + __ z_aghi(size, -32); + __ z_brnl(again); + } - // Mark remaining code as such which performs Unsafe accesses. - UnsafeMemoryAccessMark umam(this, true, false); + __ bind(tail); + + { + NearLabel dont; + __ testbit(size, 4); + __ z_brz(dont); + __ z_vst(Z_V0, Address(dest, 0)); + __ z_aghi(dest, 16); + __ bind(dont); + } + + { + NearLabel dont; + __ testbit(size, 3); + __ z_brz(dont); + __ z_vsteg(Z_V0, 0, Z_R0, dest, 0); + __ z_aghi(dest, 8); + __ bind(dont); + } - // At this point, we know the lower bit of size is zero and a - // multiple of 2 - do_setmemory_atomic_loop(2, dest, size, byteVal, _masm); + __ z_tmll(size, 7); + __ z_brc(Assembler::bcondAllZero, finished); - __ bind(L_fill8Bytes); - // At this point, we know the lower 3 bits of size are zero and a - // multiple of 8 - do_setmemory_atomic_loop(8, dest, size, byteVal, _masm); + { + NearLabel dont; + __ testbit(size, 2); + __ z_brz(dont); + __ z_vstef(Z_V0, 0, Z_R0, dest, 0); + __ z_aghi(dest, 4); + __ bind(dont); + } - __ bind(L_fill4Bytes); - // At this point, we know the lower 2 bits of size are zero and a - // multiple of 4 - do_setmemory_atomic_loop(4, dest, size, byteVal, _masm); + { + NearLabel dont; + __ testbit(size, 1); + __ z_brz(dont); + __ z_vsteh(Z_V0, 0, Z_R0, dest, 0); + __ z_aghi(dest, 2); + __ bind(dont); + } - __ bind(L_fillBytes); - do_setmemory_atomic_loop_mvc(dest, size, byteVal, _masm); + { + NearLabel dont; + __ testbit(size, 0); + __ z_brz(dont); + __ z_vsteb(Z_V0, 0, Z_R0, dest, 0); + __ bind(dont); } + + __ bind(finished); + __ z_br(Z_R14); + return __ addr_at(start_off); } @@ -1645,7 +1611,9 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_uninit_id); #ifdef COMPILER2 - StubRoutines::_unsafe_setmemory = generate_unsafe_setmemory(StubRoutines::_jbyte_fill); + StubRoutines::_unsafe_setmemory = + VM_Version::has_VectorFacility() ? generate_unsafe_setmemory(StubRoutines::_jbyte_fill) : nullptr; + #endif // COMPILER2 }