@@ -1471,6 +1471,7 @@ class StubGenerator: public StubCodeGenerator {
14711471 __ subq (temp1, loop_size[shift]);
14721472
14731473 // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
1474+ __ align (32 );
14741475 __ BIND (L_main_loop);
14751476 __ copy64_avx (to, from, temp4, xmm1, false , shift, 0 );
14761477 __ copy64_avx (to, from, temp4, xmm1, false , shift, 64 );
@@ -1537,6 +1538,7 @@ class StubGenerator: public StubCodeGenerator {
15371538
15381539 // Main loop with aligned copy block size of 192 bytes at
15391540 // 64 byte copy granularity.
1541+ __ align (32 );
15401542 __ BIND (L_main_loop_64bytes);
15411543 __ copy64_avx (to, from, temp4, xmm1, false , shift, 0 , true );
15421544 __ copy64_avx (to, from, temp4, xmm1, false , shift, 64 , true );
@@ -1676,6 +1678,7 @@ class StubGenerator: public StubCodeGenerator {
16761678 __ BIND (L_main_pre_loop);
16771679
16781680 // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
1681+ __ align (32 );
16791682 __ BIND (L_main_loop);
16801683 __ copy64_avx (to, from, temp1, xmm1, true , shift, -64 );
16811684 __ copy64_avx (to, from, temp1, xmm1, true , shift, -128 );
@@ -1708,6 +1711,7 @@ class StubGenerator: public StubCodeGenerator {
17081711
17091712 // Main loop with aligned copy block size of 192 bytes at
17101713 // 64 byte copy granularity.
1714+ __ align (32 );
17111715 __ BIND (L_main_loop_64bytes);
17121716 __ copy64_avx (to, from, temp1, xmm1, true , shift, -64 , true );
17131717 __ copy64_avx (to, from, temp1, xmm1, true , shift, -128 , true );
@@ -1770,7 +1774,7 @@ class StubGenerator: public StubCodeGenerator {
17701774 //
17711775 address generate_disjoint_byte_copy (bool aligned, address* entry, const char *name) {
17721776#if COMPILER2_OR_JVMCI
1773- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
1777+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
17741778 return generate_disjoint_copy_avx3_masked (entry, " jbyte_disjoint_arraycopy_avx3" , 0 ,
17751779 aligned, false , false );
17761780 }
@@ -1886,7 +1890,7 @@ class StubGenerator: public StubCodeGenerator {
18861890 address generate_conjoint_byte_copy (bool aligned, address nooverlap_target,
18871891 address* entry, const char *name) {
18881892#if COMPILER2_OR_JVMCI
1889- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
1893+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
18901894 return generate_conjoint_copy_avx3_masked (entry, " jbyte_conjoint_arraycopy_avx3" , 0 ,
18911895 nooverlap_target, aligned, false , false );
18921896 }
@@ -1997,7 +2001,7 @@ class StubGenerator: public StubCodeGenerator {
19972001 //
19982002 address generate_disjoint_short_copy (bool aligned, address *entry, const char *name) {
19992003#if COMPILER2_OR_JVMCI
2000- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
2004+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
20012005 return generate_disjoint_copy_avx3_masked (entry, " jshort_disjoint_arraycopy_avx3" , 1 ,
20022006 aligned, false , false );
20032007 }
@@ -2128,7 +2132,7 @@ class StubGenerator: public StubCodeGenerator {
21282132 address generate_conjoint_short_copy (bool aligned, address nooverlap_target,
21292133 address *entry, const char *name) {
21302134#if COMPILER2_OR_JVMCI
2131- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
2135+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
21322136 return generate_conjoint_copy_avx3_masked (entry, " jshort_conjoint_arraycopy_avx3" , 1 ,
21332137 nooverlap_target, aligned, false , false );
21342138 }
@@ -2232,7 +2236,7 @@ class StubGenerator: public StubCodeGenerator {
22322236 address generate_disjoint_int_oop_copy (bool aligned, bool is_oop, address* entry,
22332237 const char *name, bool dest_uninitialized = false ) {
22342238#if COMPILER2_OR_JVMCI
2235- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
2239+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
22362240 return generate_disjoint_copy_avx3_masked (entry, " jint_disjoint_arraycopy_avx3" , 2 ,
22372241 aligned, is_oop, dest_uninitialized);
22382242 }
@@ -2343,7 +2347,7 @@ class StubGenerator: public StubCodeGenerator {
23432347 address *entry, const char *name,
23442348 bool dest_uninitialized = false ) {
23452349#if COMPILER2_OR_JVMCI
2346- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
2350+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
23472351 return generate_conjoint_copy_avx3_masked (entry, " jint_conjoint_arraycopy_avx3" , 2 ,
23482352 nooverlap_target, aligned, is_oop, dest_uninitialized);
23492353 }
@@ -2456,7 +2460,7 @@ class StubGenerator: public StubCodeGenerator {
24562460 address generate_disjoint_long_oop_copy (bool aligned, bool is_oop, address *entry,
24572461 const char *name, bool dest_uninitialized = false ) {
24582462#if COMPILER2_OR_JVMCI
2459- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
2463+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
24602464 return generate_disjoint_copy_avx3_masked (entry, " jlong_disjoint_arraycopy_avx3" , 3 ,
24612465 aligned, is_oop, dest_uninitialized);
24622466 }
@@ -2566,7 +2570,7 @@ class StubGenerator: public StubCodeGenerator {
25662570 address nooverlap_target, address *entry,
25672571 const char *name, bool dest_uninitialized = false ) {
25682572#if COMPILER2_OR_JVMCI
2569- if (VM_Version::supports_avx512vlbw () && MaxVectorSize >= 32 ) {
2573+ if (VM_Version::supports_avx512vlbw () && VM_Version::supports_bmi2 () && MaxVectorSize >= 32 ) {
25702574 return generate_conjoint_copy_avx3_masked (entry, " jlong_conjoint_arraycopy_avx3" , 3 ,
25712575 nooverlap_target, aligned, is_oop, dest_uninitialized);
25722576 }
0 commit comments