Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2413,6 +2413,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
case Op_VectorMaskCmp:
if (vlen < 2 || bit_size < 64) {
return false;
}
break;
Comment on lines +2416 to +2420
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, currently min_vector_size for byte type is lower to 4 bytes to support vector api shuffle, but we don't have a per Opcode size check correctly. E.g. no length check for reduce_add8B, do you also see any issue for that? I think we'd better only allow 4 bytes for vector api shuffle related opcodes (and 2 for mask related opcodes) in match_rule_supported_vector, while for other opcodes, we still only support for >=64 bits.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, currently min_vector_size for byte type is lower to 4 bytes to support vector api shuffle, but we don't have a per Opcode size check correctly. E.g. no length check for reduce_add8B, do you also see any issue for that?

Since SLP does not support subword reduction, I think current match rules are fine. AArch64 part looks good to me.

default:
break;
}
Expand Down
7 changes: 6 additions & 1 deletion src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1496,7 +1496,11 @@ void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int v

void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
if (vlen_in_bytes <= 16) {
if (vlen_in_bytes == 4) {
movdl(dst, addr);
} else if (vlen_in_bytes == 8) {
movq(dst, addr);
} else if (vlen_in_bytes == 16) {
movdqu(dst, addr, scratch);
} else if (vlen_in_bytes == 32) {
vmovdqu(dst, addr, scratch);
Expand All @@ -1505,6 +1509,7 @@ void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int
evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
}
}

// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.

void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
Expand Down
7 changes: 6 additions & 1 deletion src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1835,6 +1835,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
case Op_VectorMaskCmp:
if (vlen < 2 || size_in_bits < 32) {
return false;
}
break;
}
return true; // Per default match rules are supported.
}
Expand Down Expand Up @@ -6894,7 +6899,7 @@ instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg kt
instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
predicate((UseAVX <= 2 || !VM_Version::supports_avx512vl()) &&
!is_unsigned_booltest_pred(n->in(2)->get_int()) &&
vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
Expand Down
3 changes: 0 additions & 3 deletions src/hotspot/share/opto/vectorIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,9 +411,6 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
int num_elem = vlen->get_con();
BasicType elem_bt = T_BYTE;

if (num_elem < 4)
return false;

if (!arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed)) {
return false;
}
Expand Down
58 changes: 58 additions & 0 deletions test/hotspot/jtreg/compiler/vectorapi/TestVectorShuffleIota.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) 2021, Huawei Technologies Co. Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package compiler.vectorapi;

import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.VectorSpecies;
import jdk.incubator.vector.VectorShuffle;

/*
* @test
* @bug 8265907
* @modules jdk.incubator.vector
* @run main/othervm compiler.vectorapi.TestVectorShuffleIota
*/

public class TestVectorShuffleIota {
static final VectorSpecies<Integer> SPECIESi = IntVector.SPECIES_128;

static final int INVOC_COUNT = 50000;

static int[] ai = {87, 65, 78, 71};

static void testShuffleI() {
IntVector iv = (IntVector) VectorShuffle.iota(SPECIESi, 0, 2, false).toVector();
iv.intoArray(ai, 0);
}

public static void main(String[] args) {
for (int i = 0; i < INVOC_COUNT; i++) {
testShuffleI();
}
for (int i = 0; i < ai.length; i++) {
System.out.print(ai[i] + ", ");
}
System.out.println();
}
}