diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 4317bb3d01825..a52b9cfc867cd 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1591,23 +1591,18 @@ void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, } } -void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt, - XMMRegister dst, Register base, - Register idx_base, - Register offset, Register mask, - Register mask_idx, Register rtmp, - int vlen_enc) { +void C2_MacroAssembler::vgather8b_masked(BasicType elem_bt, XMMRegister dst, + Register base, Register idx_base, + Register mask, Register mask_idx, + Register rtmp, int vlen_enc) { vpxor(dst, dst, dst, vlen_enc); if (elem_bt == T_SHORT) { for (int i = 0; i < 4; i++) { - // dst[i] = mask[i] ? src[offset + idx_base[i]] : 0 + // dst[i] = mask[i] ? src[idx_base[i]] : 0 Label skip_load; btq(mask, mask_idx); jccb(Assembler::carryClear, skip_load); movl(rtmp, Address(idx_base, i * 4)); - if (offset != noreg) { - addl(rtmp, offset); - } pinsrw(dst, Address(base, rtmp, Address::times_2), i); bind(skip_load); incq(mask_idx); @@ -1615,14 +1610,11 @@ void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt, } else { assert(elem_bt == T_BYTE, ""); for (int i = 0; i < 8; i++) { - // dst[i] = mask[i] ? src[offset + idx_base[i]] : 0 + // dst[i] = mask[i] ? src[idx_base[i]] : 0 Label skip_load; btq(mask, mask_idx); jccb(Assembler::carryClear, skip_load); movl(rtmp, Address(idx_base, i * 4)); - if (offset != noreg) { - addl(rtmp, offset); - } pinsrb(dst, Address(base, rtmp), i); bind(skip_load); incq(mask_idx); @@ -1630,28 +1622,21 @@ void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt, } } -void C2_MacroAssembler::vgather8b_offset(BasicType elem_bt, XMMRegister dst, - Register base, Register idx_base, - Register offset, Register rtmp, - int vlen_enc) { +void C2_MacroAssembler::vgather8b(BasicType elem_bt, XMMRegister dst, + Register base, Register idx_base, + Register rtmp, int vlen_enc) { vpxor(dst, dst, dst, vlen_enc); if (elem_bt == T_SHORT) { for (int i = 0; i < 4; i++) { - // dst[i] = src[offset + idx_base[i]] + // dst[i] = src[idx_base[i]] movl(rtmp, Address(idx_base, i * 4)); - if (offset != noreg) { - addl(rtmp, offset); - } pinsrw(dst, Address(base, rtmp, Address::times_2), i); } } else { assert(elem_bt == T_BYTE, ""); for (int i = 0; i < 8; i++) { - // dst[i] = src[offset + idx_base[i]] + // dst[i] = src[idx_base[i]] movl(rtmp, Address(idx_base, i * 4)); - if (offset != noreg) { - addl(rtmp, offset); - } pinsrb(dst, Address(base, rtmp), i); } } @@ -1680,11 +1665,10 @@ void C2_MacroAssembler::vgather8b_offset(BasicType elem_bt, XMMRegister dst, */ void C2_MacroAssembler::vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, - Register offset, Register mask, - XMMRegister xtmp1, XMMRegister xtmp2, - XMMRegister temp_dst, Register rtmp, - Register mask_idx, Register length, - int vector_len, int vlen_enc) { + Register mask, XMMRegister xtmp1, + XMMRegister xtmp2, XMMRegister temp_dst, + Register rtmp, Register mask_idx, + Register length, int vector_len, int vlen_enc) { Label GATHER8_LOOP; assert(is_subword_type(elem_ty), ""); movl(length, vector_len); @@ -1698,9 +1682,9 @@ void C2_MacroAssembler::vgather_subword(BasicType elem_ty, XMMRegister dst, bind(GATHER8_LOOP); // TMP_VEC_64(temp_dst) = PICK_SUB_WORDS_FROM_GATHER_INDICES if (mask == noreg) { - vgather8b_offset(elem_ty, temp_dst, base, idx_base, offset, rtmp, vlen_enc); + vgather8b(elem_ty, temp_dst, base, idx_base, rtmp, vlen_enc); } else { - vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc); + vgather8b_masked(elem_ty, temp_dst, base, idx_base, mask, mask_idx, rtmp, vlen_enc); } // TEMP_PERM_VEC(temp_dst) = PERMUTE TMP_VEC_64(temp_dst) PERM_INDEX(xtmp1) vpermd(temp_dst, xtmp1, temp_dst, vlen_enc == Assembler::AVX_512bit ? vlen_enc : Assembler::AVX_256bit); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 713eb73d68f38..10551293d5963 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -492,15 +492,14 @@ void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); - void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register offset, - Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, + void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register mask, + XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, Register midx, Register length, int vector_len, int vlen_enc); - void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, - Register offset, Register mask, Register midx, Register rtmp, int vlen_enc); - - void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, - Register offset, Register rtmp, int vlen_enc); + void vgather8b_masked(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, + Register mask, Register midx, Register rtmp, int vlen_enc); + void vgather8b(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, + Register rtmp, int vlen_enc); void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index a281331cb2986..0f25eed103031 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -4023,24 +4023,24 @@ instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRe ins_pipe( pipe_slow ); %} -instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ +instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{ predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); - match(Set dst (LoadVectorGather mem (Binary idx_base offset))); + match(Set dst (LoadVectorGather mem idx_base)); effect(TEMP tmp, TEMP rtmp); format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); BasicType elem_bt = Matcher::vector_element_basic_type(this); __ lea($tmp$$Register, $mem$$Address); - __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); + __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, +instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp, vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); - match(Set dst (LoadVectorGather mem (Binary idx_base offset))); + match(Set dst (LoadVectorGather mem idx_base)); effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} ins_encode %{ @@ -4049,49 +4049,15 @@ instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, BasicType elem_bt = Matcher::vector_element_basic_type(this); __ lea($tmp$$Register, $mem$$Address); __ movptr($idx_base_temp$$Register, $idx_base$$Register); - __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, + __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ - predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); - match(Set dst (LoadVectorGather mem (Binary idx_base offset))); - effect(TEMP tmp, TEMP rtmp, KILL cr); - format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ lea($tmp$$Register, $mem$$Address); - __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - - -instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, - vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ - predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); - match(Set dst (LoadVectorGather mem (Binary idx_base offset))); - effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); - format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - int vector_len = Matcher::vector_length(this); - BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ lea($tmp$$Register, $mem$$Address); - __ movptr($idx_base_temp$$Register, $idx_base$$Register); - __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, - $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - - -instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ +instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); + match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} ins_encode %{ @@ -4100,15 +4066,15 @@ instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, im __ xorq($mask_idx$$Register, $mask_idx$$Register); __ lea($tmp$$Register, $mem$$Address); __ kmovql($rtmp2$$Register, $mask$$KRegister); - __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); + __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, +instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp, vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); + match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} ins_encode %{ @@ -4119,52 +4085,15 @@ instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, im __ lea($tmp$$Register, $mem$$Address); __ movptr($idx_base_temp$$Register, $idx_base$$Register); __ kmovql($rtmp2$$Register, $mask$$KRegister); - __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, + __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ - predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); - effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); - format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ xorq($mask_idx$$Register, $mask_idx$$Register); - __ lea($tmp$$Register, $mem$$Address); - __ kmovql($rtmp2$$Register, $mask$$KRegister); - __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, - $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - -instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, - vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ - predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); - effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); - format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - int vector_len = Matcher::vector_length(this); - BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ xorq($mask_idx$$Register, $mask_idx$$Register); - __ lea($tmp$$Register, $mem$$Address); - __ movptr($idx_base_temp$$Register, $idx_base$$Register); - __ kmovql($rtmp2$$Register, $mask$$KRegister); - __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, - $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - -instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ +instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); + match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} ins_encode %{ @@ -4177,15 +4106,15 @@ instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, im __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); } __ xorl($mask_idx$$Register, $mask_idx$$Register); - __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); + __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); %} ins_pipe( pipe_slow ); %} -instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, +instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp, vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); + match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask))); effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} ins_encode %{ @@ -4200,53 +4129,7 @@ instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, im __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); } __ xorl($mask_idx$$Register, $mask_idx$$Register); - __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, - $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - -instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); - effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); - format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ lea($tmp$$Register, $mem$$Address); - __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); - if (elem_bt == T_SHORT) { - __ movl($mask_idx$$Register, 0x55555555); - __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); - } - __ xorl($mask_idx$$Register, $mask_idx$$Register); - __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, - $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - -instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, - vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); - match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); - effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); - format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - int vector_len = Matcher::vector_length(this); - BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ xorl($mask_idx$$Register, $mask_idx$$Register); - __ lea($tmp$$Register, $mem$$Address); - __ movptr($idx_base_temp$$Register, $idx_base$$Register); - __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); - if (elem_bt == T_SHORT) { - __ movl($mask_idx$$Register, 0x55555555); - __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); - } - __ xorl($mask_idx$$Register, $mask_idx$$Register); - __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, + __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); %} ins_pipe( pipe_slow ); diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index eeefddfedfc44..4c32c2642899c 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -1269,9 +1269,13 @@ class methodHandle; "Ljava/lang/Class;" \ "I" \ "Ljava/lang/Class;" \ + "I" \ "Ljava/lang/Object;" \ "J" \ "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ + "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ "Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \ "Ljava/lang/Object;" \ "I[II" \ @@ -1286,6 +1290,7 @@ class methodHandle; "Ljava/lang/Class;" \ "I" \ "Ljava/lang/Class;" \ + "I" \ "Ljava/lang/Object;" \ "J" \ "Ljdk/internal/vm/vector/VectorSupport$Vector;" \ diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index 0849b40ad7e38..5cb56019bc144 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -2517,22 +2517,7 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) { n->del_req(3); break; } - case Op_LoadVectorGather: - if (is_subword_type(n->bottom_type()->is_vect()->element_basic_type())) { - Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1)); - n->set_req(MemNode::ValueIn, pair); - n->del_req(MemNode::ValueIn+1); - } - break; - case Op_LoadVectorGatherMasked: - if (is_subword_type(n->bottom_type()->is_vect()->element_basic_type())) { - Node* pair2 = new BinaryNode(n->in(MemNode::ValueIn + 1), n->in(MemNode::ValueIn + 2)); - Node* pair1 = new BinaryNode(n->in(MemNode::ValueIn), pair2); - n->set_req(MemNode::ValueIn, pair1); - n->del_req(MemNode::ValueIn+2); - n->del_req(MemNode::ValueIn+1); - break; - } // fall-through + case Op_LoadVectorGatherMasked: // fall-through case Op_StoreVectorScatter: { Node* pair = new BinaryNode(n->in(MemNode::ValueIn), n->in(MemNode::ValueIn+1)); n->set_req(MemNode::ValueIn, pair); diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 5ff2590a19006..4aa6de2bb4cbd 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -1203,29 +1203,31 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) { return true; } -// , -// W extends Vector, -// S extends VectorSpecies, -// M extends VectorMask, -// E> -// V loadWithMap(Class vectorClass, Class maskClass, Class elementType, int length, -// Class> vectorIndexClass, -// Object base, long offset, // Unsafe addressing -// W index_vector, M m, -// C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation -// LoadVectorOperationWithMap defaultImpl) +// +// , +// W extends Vector, +// S extends VectorSpecies, +// M extends VectorMask, +// E> +// V loadWithMap(Class vClass, Class mClass, Class eClass, int length, +// Class> vectorIndexClass, int indexLength, +// Object base, long offset, +// W indexVector1, W indexVector2, W indexVector3, W indexVector4, +// M m, C container, int index, int[] indexMap, int indexM, S s, +// LoadVectorOperationWithMap defaultImpl) // // , // W extends Vector, // M extends VectorMask, // E> -// void storeWithMap(Class vectorClass, Class maskClass, Class elementType, -// int length, Class> vectorIndexClass, Object base, long offset, // Unsafe addressing -// W index_vector, V v, M m, -// C container, int index, int[] indexMap, int indexM, // Arguments for default implementation -// StoreVectorOperationWithMap defaultImpl) +// void storeWithMap(Class vClass, Class mClass, Class eClass, int length, +// Class> vectorIndexClass, int indexLength, +// Object base, long offset, // Unsafe addressing +// W indexVector, V v, M m, +// C container, int index, int[] indexMap, int indexM, // Arguments for default implementation +// StoreVectorOperationWithMap defaultImpl) // bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); @@ -1233,16 +1235,19 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); const TypeInt* vlen = gvn().type(argument(3))->isa_int(); const TypeInstPtr* vector_idx_klass = gvn().type(argument(4))->isa_instptr(); + const TypeInt* idx_vlen = gvn().type(argument(5))->isa_int(); if (vector_klass == nullptr || vector_klass->const_oop() == nullptr || elem_klass == nullptr || elem_klass->const_oop() == nullptr || vlen == nullptr || !vlen->is_con() || - vector_idx_klass == nullptr || vector_idx_klass->const_oop() == nullptr) { - log_if_needed(" ** missing constant: vclass=%s etype=%s vlen=%s viclass=%s", + vector_idx_klass == nullptr || vector_idx_klass->const_oop() == nullptr || + idx_vlen == nullptr || !idx_vlen->is_con()) { + log_if_needed(" ** missing constant: vclass=%s etype=%s vlen=%s viclass=%s idx_vlen=%s", NodeClassNames[argument(0)->Opcode()], NodeClassNames[argument(2)->Opcode()], NodeClassNames[argument(3)->Opcode()], - NodeClassNames[argument(4)->Opcode()]); + NodeClassNames[argument(4)->Opcode()], + NodeClassNames[argument(5)->Opcode()]); return false; // not enough info for intrinsification } @@ -1259,8 +1264,10 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { BasicType elem_bt = elem_type->basic_type(); int num_elem = vlen->get_con(); + int idx_num_elem = idx_vlen->get_con(); - const Type* vmask_type = gvn().type(is_scatter ? argument(10) : argument(9)); + Node* m = is_scatter ? argument(11) : argument(13); + const Type* vmask_type = gvn().type(m); bool is_masked_op = vmask_type != TypePtr::NULL_PTR; if (is_masked_op) { if (mask_klass == nullptr || mask_klass->const_oop() == nullptr) { @@ -1298,23 +1305,33 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { // Check that the vector holding indices is supported by architecture // For sub-word gathers expander receive index array. - if (!is_subword_type(elem_bt) && !arch_supports_vector(Op_LoadVector, num_elem, T_INT, VecMaskNotUsed)) { - log_if_needed(" ** not supported: arity=%d op=%s/loadindex vlen=%d etype=int is_masked_op=%d", - is_scatter, is_scatter ? "scatter" : "gather", - num_elem, is_masked_op ? 1 : 0); - return false; // not supported + if (!is_subword_type(elem_bt) && !arch_supports_vector(Op_LoadVector, idx_num_elem, T_INT, VecMaskNotUsed)) { + log_if_needed(" ** not supported: arity=%d op=%s/loadindex vlen=%d etype=int is_masked_op=%d", + is_scatter, is_scatter ? "scatter" : "gather", + idx_num_elem, is_masked_op ? 1 : 0); + return false; // not supported } - Node* base = argument(5); - Node* offset = ConvL2X(argument(6)); + Node* base = argument(6); + Node* offset = ConvL2X(argument(7)); // Save state and restore on bailout uint old_sp = sp(); SafePointNode* old_map = clone_map(); - Node* addr = make_unsafe_address(base, offset, elem_bt, true); + Node* addr = nullptr; + if (!is_subword_type(elem_bt)) { + addr = make_unsafe_address(base, offset, elem_bt, true); + } else { + assert(!is_scatter, "Only supports gather operation for subword types now"); + uint header = arrayOopDesc::base_offset_in_bytes(elem_bt); + assert(offset->is_Con() && offset->bottom_type()->is_long()->get_con() == header, + "offset must be the array base offset"); + Node* index = argument(15); + addr = array_element_address(base, index, elem_bt); + } - const TypePtr *addr_type = gvn().type(addr)->isa_ptr(); + const TypePtr* addr_type = gvn().type(addr)->isa_ptr(); const TypeAryPtr* arr_type = addr_type->isa_aryptr(); // The array must be consistent with vector type @@ -1336,26 +1353,31 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { return false; } - Node* index_vect = nullptr; + // Get the indexes for gather/scatter. + Node* indexes = nullptr; const TypeInstPtr* vbox_idx_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_idx_klass); - if (!is_subword_type(elem_bt)) { - index_vect = unbox_vector(argument(8), vbox_idx_type, T_INT, num_elem); - if (index_vect == nullptr) { + if (is_subword_type(elem_bt)) { + Node* indexMap = argument(16); + Node* indexM = argument(17); + indexes = array_element_address(indexMap, indexM, T_INT); + } else { + // Get the first index vector. + indexes = unbox_vector(argument(9), vbox_idx_type, T_INT, idx_num_elem); + if (indexes == nullptr) { set_map(old_map); set_sp(old_sp); return false; } } + // Get the vector mask value. Node* mask = nullptr; if (is_masked_op) { ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); - mask = unbox_vector(is_scatter ? argument(10) : argument(9), mbox_type, elem_bt, num_elem); + mask = unbox_vector(m, mbox_type, elem_bt, num_elem); if (mask == nullptr) { - log_if_needed(" ** unbox failed mask=%s", - is_scatter ? NodeClassNames[argument(10)->Opcode()] - : NodeClassNames[argument(9)->Opcode()]); + log_if_needed(" ** unbox failed mask=%s", NodeClassNames[m->Opcode()]); set_map(old_map); set_sp(old_sp); return false; @@ -1364,7 +1386,7 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { const TypeVect* vector_type = TypeVect::make(elem_bt, num_elem); if (is_scatter) { - Node* val = unbox_vector(argument(9), vbox_type, elem_bt, num_elem); + Node* val = unbox_vector(argument(10), vbox_type, elem_bt, num_elem); if (val == nullptr) { set_map(old_map); set_sp(old_sp); @@ -1374,37 +1396,23 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { Node* vstore = nullptr; if (mask != nullptr) { - vstore = gvn().transform(new StoreVectorScatterMaskedNode(control(), memory(addr), addr, addr_type, val, index_vect, mask)); + vstore = gvn().transform(new StoreVectorScatterMaskedNode(control(), memory(addr), addr, addr_type, val, indexes, mask)); } else { - vstore = gvn().transform(new StoreVectorScatterNode(control(), memory(addr), addr, addr_type, val, index_vect)); + vstore = gvn().transform(new StoreVectorScatterNode(control(), memory(addr), addr, addr_type, val, indexes)); } set_memory(vstore, addr_type); } else { Node* vload = nullptr; - Node* index = argument(11); - Node* indexMap = argument(12); - Node* indexM = argument(13); if (mask != nullptr) { - if (is_subword_type(elem_bt)) { - Node* index_arr_base = array_element_address(indexMap, indexM, T_INT); - vload = gvn().transform(new LoadVectorGatherMaskedNode(control(), memory(addr), addr, addr_type, vector_type, index_arr_base, mask, index)); - } else { - vload = gvn().transform(new LoadVectorGatherMaskedNode(control(), memory(addr), addr, addr_type, vector_type, index_vect, mask)); - } + vload = gvn().transform(new LoadVectorGatherMaskedNode(control(), memory(addr), addr, addr_type, vector_type, indexes, mask)); } else { - if (is_subword_type(elem_bt)) { - Node* index_arr_base = array_element_address(indexMap, indexM, T_INT); - vload = gvn().transform(new LoadVectorGatherNode(control(), memory(addr), addr, addr_type, vector_type, index_arr_base, index)); - } else { - vload = gvn().transform(new LoadVectorGatherNode(control(), memory(addr), addr, addr_type, vector_type, index_vect)); - } + vload = gvn().transform(new LoadVectorGatherNode(control(), memory(addr), addr, addr_type, vector_type, indexes)); } Node* box = box_vector(vload, vbox_type, elem_bt, num_elem); set_result(box); } destruct_map_clone(old_map); - C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); return true; } diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index e72c3880c7983..3caaf7c59d7d3 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -1117,25 +1117,18 @@ class LoadVectorNode : public LoadNode { // Load Vector from memory via index map class LoadVectorGatherNode : public LoadVectorNode { public: - LoadVectorGatherNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices, Node* offset = nullptr) + LoadVectorGatherNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices) : LoadVectorNode(c, mem, adr, at, vt) { init_class_id(Class_LoadVectorGather); add_req(indices); DEBUG_ONLY(bool is_subword = is_subword_type(vt->element_basic_type())); assert(is_subword || indices->bottom_type()->is_vect(), "indices must be in vector"); - assert(is_subword || !offset, ""); assert(req() == MemNode::ValueIn + 1, "match_edge expects that index input is in MemNode::ValueIn"); - if (offset) { - add_req(offset); - } } virtual int Opcode() const; virtual uint match_edge(uint idx) const { - return idx == MemNode::Address || - idx == MemNode::ValueIn || - ((is_subword_type(vect_type()->element_basic_type())) && - idx == MemNode::ValueIn + 1); + return idx == MemNode::Address || idx == MemNode::ValueIn; } virtual int store_Opcode() const { // Ensure it is different from any store opcode to avoid folding when indices are used @@ -1254,23 +1247,19 @@ class LoadVectorMaskedNode : public LoadVectorNode { // Load Vector from memory via index map under the influence of a predicate register(mask). class LoadVectorGatherMaskedNode : public LoadVectorNode { public: - LoadVectorGatherMaskedNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices, Node* mask, Node* offset = nullptr) + LoadVectorGatherMaskedNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, Node* indices, Node* mask) : LoadVectorNode(c, mem, adr, at, vt) { init_class_id(Class_LoadVectorGatherMasked); add_req(indices); add_req(mask); assert(req() == MemNode::ValueIn + 2, "match_edge expects that last input is in MemNode::ValueIn+1"); - if (is_subword_type(vt->element_basic_type())) { - add_req(offset); - } + assert(is_subword_type(vt->element_basic_type()) || indices->bottom_type()->is_vect(), "indices must be in vector"); } virtual int Opcode() const; virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn || - idx == MemNode::ValueIn + 1 || - (is_subword_type(vect_type()->is_vect()->element_basic_type()) && - idx == MemNode::ValueIn + 2); } + idx == MemNode::ValueIn + 1; } virtual int store_Opcode() const { // Ensure it is different from any store opcode to avoid folding when indices and mask are used return -1; diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java index 4a8ad79b50c3d..d3705a279ca28 100644 --- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java +++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -491,8 +491,8 @@ public interface LoadVectorOperationWithMap vClass, Class mClass, Class eClass, int length, Class> vectorIndexClass, - Object base, long offset, - W index_vector, + int indexLength, Object base, long offset, + W indexVector1, W indexVector2, W indexVector3, W indexVector4, M m, C container, int index, int[] indexMap, int indexM, S s, LoadVectorOperationWithMap defaultImpl) { assert isNonCapturingLambda(defaultImpl) : defaultImpl; @@ -558,8 +558,8 @@ public interface StoreVectorOperationWithMap vClass, Class mClass, Class eClass, int length, Class> vectorIndexClass, - Object base, long offset, - W index_vector, + int indexLength, Object base, long offset, + W indexVector, V v, M m, C container, int index, int[] indexMap, int indexM, StoreVectorOperationWithMap defaultImpl) { assert isNonCapturingLambda(defaultImpl) : defaultImpl; diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java index 2b1cc879e6638..c788b4e1617d0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java @@ -3120,17 +3120,30 @@ ByteVector fromArray(VectorSpecies species, } // Check indices are within array bounds. - for (int i = 0; i < vsp.length(); i += lsp.length()) { - IntVector vix = IntVector - .fromArray(lsp, indexMap, mapOffset + i) - .add(offset); - VectorIntrinsics.checkIndex(vix, a.length); + IntVector vix0 = IntVector.fromArray(lsp, indexMap, mapOffset).add(offset); + VectorIntrinsics.checkIndex(vix0, a.length); + + int vlen = vsp.length(); + int idx_vlen = lsp.length(); + IntVector vix1 = null; + if (vlen >= idx_vlen * 2) { + vix1 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen).add(offset); + VectorIntrinsics.checkIndex(vix1, a.length); + } + + IntVector vix2 = null; + IntVector vix3 = null; + if (vlen == idx_vlen * 4) { + vix2 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 2).add(offset); + VectorIntrinsics.checkIndex(vix2, a.length); + vix3 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 3).add(offset); + VectorIntrinsics.checkIndex(vix3, a.length); } return VectorSupport.loadWithMap( vectorType, null, byte.class, vsp.laneCount(), - lsp.vectorType(), - a, ARRAY_BASE, null, null, + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, vix2, vix3, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); @@ -3882,17 +3895,30 @@ ByteVector fromArray0Template(Class maskClass, byte[] a, int offset, // Check indices are within array bounds. // FIXME: Check index under mask controlling. - for (int i = 0; i < vsp.length(); i += lsp.length()) { - IntVector vix = IntVector - .fromArray(lsp, indexMap, mapOffset + i) - .add(offset); - VectorIntrinsics.checkIndex(vix, a.length); + IntVector vix0 = IntVector.fromArray(lsp, indexMap, mapOffset).add(offset); + VectorIntrinsics.checkIndex(vix0, a.length); + + int vlen = vsp.length(); + int idx_vlen = lsp.length(); + IntVector vix1 = null; + if (vlen >= idx_vlen * 2) { + vix1 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen).add(offset); + VectorIntrinsics.checkIndex(vix1, a.length); + } + + IntVector vix2 = null; + IntVector vix3 = null; + if (vlen == idx_vlen * 4) { + vix2 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 2).add(offset); + VectorIntrinsics.checkIndex(vix2, a.length); + vix3 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 3).add(offset); + VectorIntrinsics.checkIndex(vix3, a.length); } return VectorSupport.loadWithMap( vectorType, maskClass, byte.class, vsp.laneCount(), - lsp.vectorType(), - a, ARRAY_BASE, null, m, + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, vix2, vix3, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java index 48446c6fa0129..786cd089ebecb 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java @@ -2937,8 +2937,8 @@ DoubleVector fromArray(VectorSpecies species, return VectorSupport.loadWithMap( vectorType, null, double.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, null, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); @@ -3228,7 +3228,7 @@ void intoArray(double[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, null, a, offset, indexMap, mapOffset, @@ -3423,8 +3423,8 @@ DoubleVector fromArray0Template(Class maskClass, double[] a, int offset, return VectorSupport.loadWithMap( vectorType, maskClass, double.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, m, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); @@ -3539,7 +3539,7 @@ void intoArray0Template(Class maskClass, double[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, m, a, offset, indexMap, mapOffset, diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java index 1e0829a3b1c0c..b481d5a51d740 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java @@ -2943,8 +2943,8 @@ FloatVector fromArray(VectorSpecies species, return VectorSupport.loadWithMap( vectorType, null, float.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, null, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); @@ -3215,7 +3215,7 @@ void intoArray(float[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, null, a, offset, indexMap, mapOffset, @@ -3392,8 +3392,8 @@ FloatVector fromArray0Template(Class maskClass, float[] a, int offset, return VectorSupport.loadWithMap( vectorType, maskClass, float.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, m, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); @@ -3489,7 +3489,7 @@ void intoArray0Template(Class maskClass, float[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, m, a, offset, indexMap, mapOffset, diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java index b691527bec680..85aac460f8b02 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java @@ -3097,8 +3097,8 @@ IntVector fromArray(VectorSpecies species, return VectorSupport.loadWithMap( vectorType, null, int.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, null, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); @@ -3369,7 +3369,7 @@ void intoArray(int[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, null, a, offset, indexMap, mapOffset, @@ -3546,8 +3546,8 @@ IntVector fromArray0Template(Class maskClass, int[] a, int offset, return VectorSupport.loadWithMap( vectorType, maskClass, int.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, m, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); @@ -3643,7 +3643,7 @@ void intoArray0Template(Class maskClass, int[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, m, a, offset, indexMap, mapOffset, diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java index 9e4dcd23d677a..abd86863165f0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java @@ -2976,8 +2976,8 @@ LongVector fromArray(VectorSpecies species, return VectorSupport.loadWithMap( vectorType, null, long.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, null, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); @@ -3267,7 +3267,7 @@ void intoArray(long[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, null, a, offset, indexMap, mapOffset, @@ -3462,8 +3462,8 @@ LongVector fromArray0Template(Class maskClass, long[] a, int offset, return VectorSupport.loadWithMap( vectorType, maskClass, long.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, m, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); @@ -3578,7 +3578,7 @@ void intoArray0Template(Class maskClass, long[] a, int offset, VectorSupport.storeWithMap( vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, m, a, offset, indexMap, mapOffset, diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java index 46df27309ae3b..464b792e0f7b3 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java @@ -3121,17 +3121,21 @@ ShortVector fromArray(VectorSpecies species, } // Check indices are within array bounds. - for (int i = 0; i < vsp.length(); i += lsp.length()) { - IntVector vix = IntVector - .fromArray(lsp, indexMap, mapOffset + i) - .add(offset); - VectorIntrinsics.checkIndex(vix, a.length); + IntVector vix0 = IntVector.fromArray(lsp, indexMap, mapOffset).add(offset); + VectorIntrinsics.checkIndex(vix0, a.length); + + int vlen = vsp.length(); + int idx_vlen = lsp.length(); + IntVector vix1 = null; + if (vlen >= idx_vlen * 2) { + vix1 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen).add(offset); + VectorIntrinsics.checkIndex(vix1, a.length); } return VectorSupport.loadWithMap( vectorType, null, short.class, vsp.laneCount(), - lsp.vectorType(), - a, ARRAY_BASE, null, null, + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, null, null, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); @@ -3868,17 +3872,21 @@ ShortVector fromArray0Template(Class maskClass, short[] a, int offset, // Check indices are within array bounds. // FIXME: Check index under mask controlling. - for (int i = 0; i < vsp.length(); i += lsp.length()) { - IntVector vix = IntVector - .fromArray(lsp, indexMap, mapOffset + i) - .add(offset); - VectorIntrinsics.checkIndex(vix, a.length); + IntVector vix0 = IntVector.fromArray(lsp, indexMap, mapOffset).add(offset); + VectorIntrinsics.checkIndex(vix0, a.length); + + int vlen = vsp.length(); + int idx_vlen = lsp.length(); + IntVector vix1 = null; + if (vlen >= idx_vlen * 2) { + vix1 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen).add(offset); + VectorIntrinsics.checkIndex(vix1, a.length); } return VectorSupport.loadWithMap( vectorType, maskClass, short.class, vsp.laneCount(), - lsp.vectorType(), - a, ARRAY_BASE, null, m, + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, null, null, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template index 6d9db65a1ba83..3fa7d1dab6c1c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template @@ -3763,20 +3763,43 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } // Check indices are within array bounds. - for (int i = 0; i < vsp.length(); i += lsp.length()) { - IntVector vix = IntVector - .fromArray(lsp, indexMap, mapOffset + i) - .add(offset); - VectorIntrinsics.checkIndex(vix, a.length); + IntVector vix0 = IntVector.fromArray(lsp, indexMap, mapOffset).add(offset); + VectorIntrinsics.checkIndex(vix0, a.length); + + int vlen = vsp.length(); + int idx_vlen = lsp.length(); + IntVector vix1 = null; + if (vlen >= idx_vlen * 2) { + vix1 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen).add(offset); + VectorIntrinsics.checkIndex(vix1, a.length); + } + +#if[byte] + IntVector vix2 = null; + IntVector vix3 = null; + if (vlen == idx_vlen * 4) { + vix2 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 2).add(offset); + VectorIntrinsics.checkIndex(vix2, a.length); + vix3 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 3).add(offset); + VectorIntrinsics.checkIndex(vix3, a.length); } return VectorSupport.loadWithMap( vectorType, null, $type$.class, vsp.laneCount(), - lsp.vectorType(), - a, ARRAY_BASE, null, null, + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, vix2, vix3, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); +#else[byte] + return VectorSupport.loadWithMap( + vectorType, null, $type$.class, vsp.laneCount(), + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, null, null, null, + a, offset, indexMap, mapOffset, vsp, + (c, idx, iMap, idy, s, vm) -> + s.vOp(n -> c[idx + iMap[idy+n]])); +#end[byte] } #else[byteOrShort] @ForceInline @@ -3824,8 +3847,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { return VectorSupport.loadWithMap( vectorType, null, $type$.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, null, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, null, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(n -> c[idx + iMap[idy+n]])); @@ -4450,7 +4473,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { VectorSupport.storeWithMap( vsp.vectorType(), null, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, null, a, offset, indexMap, mapOffset, @@ -4971,20 +4994,43 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // Check indices are within array bounds. // FIXME: Check index under mask controlling. - for (int i = 0; i < vsp.length(); i += lsp.length()) { - IntVector vix = IntVector - .fromArray(lsp, indexMap, mapOffset + i) - .add(offset); - VectorIntrinsics.checkIndex(vix, a.length); + IntVector vix0 = IntVector.fromArray(lsp, indexMap, mapOffset).add(offset); + VectorIntrinsics.checkIndex(vix0, a.length); + + int vlen = vsp.length(); + int idx_vlen = lsp.length(); + IntVector vix1 = null; + if (vlen >= idx_vlen * 2) { + vix1 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen).add(offset); + VectorIntrinsics.checkIndex(vix1, a.length); + } + +#if[byte] + IntVector vix2 = null; + IntVector vix3 = null; + if (vlen == idx_vlen * 4) { + vix2 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 2).add(offset); + VectorIntrinsics.checkIndex(vix2, a.length); + vix3 = IntVector.fromArray(lsp, indexMap, mapOffset + idx_vlen * 3).add(offset); + VectorIntrinsics.checkIndex(vix3, a.length); } return VectorSupport.loadWithMap( vectorType, maskClass, $type$.class, vsp.laneCount(), - lsp.vectorType(), - a, ARRAY_BASE, null, m, + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, vix2, vix3, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); +#else[byte] + return VectorSupport.loadWithMap( + vectorType, maskClass, $type$.class, vsp.laneCount(), + lsp.vectorType(), lsp.length(), + a, ARRAY_BASE, vix0, vix1, null, null, m, + a, offset, indexMap, mapOffset, vsp, + (c, idx, iMap, idy, s, vm) -> + s.vOp(vm, n -> c[idx + iMap[idy+n]])); +#end[byte] } #else[byteOrShort] @ForceInline @@ -5034,8 +5080,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { return VectorSupport.loadWithMap( vectorType, maskClass, $type$.class, vsp.laneCount(), - isp.vectorType(), - a, ARRAY_BASE, vix, m, + isp.vectorType(), isp.length(), + a, ARRAY_BASE, vix, null, null, null, m, a, offset, indexMap, mapOffset, vsp, (c, idx, iMap, idy, s, vm) -> s.vOp(vm, n -> c[idx + iMap[idy+n]])); @@ -5225,7 +5271,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { VectorSupport.storeWithMap( vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), - isp.vectorType(), + isp.vectorType(), isp.length(), a, arrayAddress(a, 0), vix, this, m, a, offset, indexMap, mapOffset,