Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 10 additions & 36 deletions src/hotspot/share/opto/superword.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1261,50 +1261,24 @@ bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
//------------------------------isomorphic---------------------------
// Are s1 and s2 similar?
bool SuperWord::isomorphic(Node* s1, Node* s2) {
if (s1->Opcode() != s2->Opcode()) return false;
if (s1->req() != s2->req()) return false;
if (!same_velt_type(s1, s2)) return false;
if (s1->is_Bool() && s1->as_Bool()->_test._test != s2->as_Bool()->_test._test) return false;
if (s1->Opcode() != s2->Opcode() ||
s1->req() != s2->req() ||
!same_velt_type(s1, s2) ||
(s1->is_Bool() && s1->as_Bool()->_test._test != s2->as_Bool()->_test._test)) {
return false;
}

Node* s1_ctrl = s1->in(0);
Node* s2_ctrl = s2->in(0);
// If the control nodes are equivalent, no further checks are required to test for isomorphism.
if (s1_ctrl == s2_ctrl) {
return true;
} else {
bool s1_ctrl_inv = ((s1_ctrl == nullptr) ? true : lpt()->is_invariant(s1_ctrl));
bool s2_ctrl_inv = ((s2_ctrl == nullptr) ? true : lpt()->is_invariant(s2_ctrl));
// If the control nodes are not invariant for the loop, fail isomorphism test.
if (!s1_ctrl_inv || !s2_ctrl_inv) {
return false;
}
if(s1_ctrl != nullptr && s2_ctrl != nullptr) {
if (s1_ctrl->is_Proj()) {
s1_ctrl = s1_ctrl->in(0);
assert(lpt()->is_invariant(s1_ctrl), "must be invariant");
}
if (s2_ctrl->is_Proj()) {
s2_ctrl = s2_ctrl->in(0);
assert(lpt()->is_invariant(s2_ctrl), "must be invariant");
}
if (!s1_ctrl->is_RangeCheck() || !s2_ctrl->is_RangeCheck()) {
return false;
}
}
// Control nodes are invariant. However, we have no way of checking whether they resolve
// in an equivalent manner. But, we know that invariant range checks are guaranteed to
// throw before the loop (if they would have thrown). Thus, the loop would not have been reached.
// Therefore, if the control nodes for both are range checks, we accept them to be isomorphic.
for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
Node* t1 = s1->fast_out(i);
for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
Node* t2 = s2->fast_out(j);
if (VectorNode::is_muladds2i(t1) && VectorNode::is_muladds2i(t2)) {
return true;
}
}
}
const bool s1_ctrl_inv = (s1_ctrl == nullptr) || lpt()->is_invariant(s1_ctrl);
const bool s2_ctrl_inv = (s2_ctrl == nullptr) || lpt()->is_invariant(s2_ctrl);
return s1_ctrl_inv && s2_ctrl_inv;
}
return false;
}

//------------------------------independent---------------------------
Expand Down
16 changes: 16 additions & 0 deletions test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,22 @@ public class IRNode {
beforeMatchingNameRegex(MUL, "Mul(I|L|F|D)");
}

public static final String MUL_ADD_S2I = PREFIX + "MUL_ADD_S2I" + POSTFIX;
static {
beforeMatchingNameRegex(MUL_ADD_S2I, "MulAddS2I");
}

public static final String MUL_ADD_VS2VI = VECTOR_PREFIX + "MUL_ADD_VS2VI" + POSTFIX;
static {
vectorNode(MUL_ADD_VS2VI, "MulAddVS2VI", TYPE_INT);
}

// Can only be used if avx512_vnni is available.
public static final String MUL_ADD_VS2VI_VNNI = PREFIX + "MUL_ADD_VS2VI_VNNI" + POSTFIX;
static {
machOnly(MUL_ADD_VS2VI_VNNI, "vmuladdaddS2I_reg");
}

public static final String MUL_D = PREFIX + "MUL_D" + POSTFIX;
static {
beforeMatchingNameRegex(MUL_D, "MulD");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ public class IREncodingPrinter {
"avx512dq",
"avx512vl",
"avx512f",
"avx512_vnni",
// AArch64
"sha3",
"asimd",
Expand Down
96 changes: 96 additions & 0 deletions test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

/**
* @test
* @bug 8310886
* @requires os.arch == "x86_64" | os.arch == "aarch64"
* @summary Test MulAddS2I vectorization.
* @library /test/lib /
* @run driver compiler.loopopts.superword.TestMulAddS2I
*/

package compiler.loopopts.superword;

import compiler.lib.ir_framework.*;
import jdk.test.lib.Asserts;
import jdk.test.lib.Platform;

public class TestMulAddS2I {
static final int RANGE = 1024;
static final int ITER = RANGE/2 - 1;

static short[] sArr1 = new short[RANGE];
static short[] sArr2 = new short[RANGE];
static final int[] GOLDEN;

static {
for (int i = 0; i < RANGE; i++) {
sArr1[i] = (short)(AbstractInfo.getRandom().nextInt());
sArr2[i] = (short)(AbstractInfo.getRandom().nextInt());
}
GOLDEN = test();
}


public static void main(String[] args) {
if (Platform.isX64() || Platform.isX86()) {
TestFramework.runWithFlags("-XX:+UseUnalignedLoadStores");
TestFramework.runWithFlags("-XX:-UseUnalignedLoadStores");
} else {
TestFramework.run();
}
}

@Run(test = "test")
@Warmup(0)
public static void run() {
compare(test());
}

public static void compare(int[] out) {
for (int i = 0; i < ITER; i++) {
Asserts.assertEQ(out[i], GOLDEN[i], "wrong result for out[" + i + "]");
}
}

@Test
@IR(applyIfCPUFeature = {"sse2", "true"}, applyIf = {"UseUnalignedLoadStores", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"sse2", "true"}, applyIf = {"UseUnalignedLoadStores", "false"},
failOn = {IRNode.MUL_ADD_VS2VI}, // Can only pack LoadS if UseUnalignedLoadStores is true (default if sse4.2)
counts = {IRNode.MUL_ADD_S2I, "> 0"})
@IR(applyIfCPUFeature = {"asimd", "true"}, applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
@IR(applyIfCPUFeature = {"avx512_vnni", "true"}, applyIf = {"UseUnalignedLoadStores", "true"},
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
public static int[] test() {
int[] out = new int[ITER];
int[] out2 = new int[ITER];
for (int i = 0; i < ITER; i++) {
out[i] += ((sArr1[2*i] * sArr1[2*i]) + (sArr1[2*i+1] * sArr1[2*i+1]));
out2[i] += out[i];
}
return out;
}
}