From 6108c5d1b8a742db69ed17022344fa99827fb6b2 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Sun, 17 Nov 2024 19:32:53 -0500 Subject: [PATCH 01/12] Subword vectorization --- src/hotspot/cpu/aarch64/matcher_aarch64.hpp | 6 +- src/hotspot/cpu/arm/matcher_arm.hpp | 6 +- src/hotspot/cpu/ppc/matcher_ppc.hpp | 6 +- src/hotspot/cpu/riscv/matcher_riscv.hpp | 6 +- src/hotspot/cpu/s390/matcher_s390.hpp | 6 +- src/hotspot/cpu/x86/matcher_x86.hpp | 21 ++- src/hotspot/share/opto/superword.cpp | 13 +- .../share/opto/superwordVTransformBuilder.cpp | 13 ++ src/hotspot/share/opto/vtransform.cpp | 13 ++ src/hotspot/share/opto/vtransform.hpp | 16 ++ .../superword/TestSubwordVectorization.java | 155 ++++++++++++++++++ .../runner/ArrayTypeConvertTest.java | 38 +++-- .../bench/vm/compiler/VectorSubword.java | 73 +++++++++ 13 files changed, 350 insertions(+), 22 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java create mode 100644 test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java diff --git a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp index 447c5f57a8aa5..ebd5339027719 100644 --- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -198,6 +198,10 @@ } } + static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { + return false; + } + // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/arm/matcher_arm.hpp b/src/hotspot/cpu/arm/matcher_arm.hpp index a4436b7eab410..252ac261e70a8 100644 --- a/src/hotspot/cpu/arm/matcher_arm.hpp +++ b/src/hotspot/cpu/arm/matcher_arm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -191,6 +191,10 @@ } } + static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { + return false; + } + // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp index aaac79325c421..2ff9705cbf4cc 100644 --- a/src/hotspot/cpu/ppc/matcher_ppc.hpp +++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -200,6 +200,10 @@ } } + static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { + return false; + } + // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp index ed1519ec1503a..c5a6d83819a75 100644 --- a/src/hotspot/cpu/riscv/matcher_riscv.hpp +++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -197,6 +197,10 @@ } } + static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { + return false; + } + // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp index d8b1ae68f6f50..87858b8f7e0e5 100644 --- a/src/hotspot/cpu/s390/matcher_s390.hpp +++ b/src/hotspot/cpu/s390/matcher_s390.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2017, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -194,6 +194,10 @@ } } + static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { + return false; + } + // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index b311f4144b2bf..22b199beff48a 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -261,6 +261,25 @@ } } + static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { + // Vector casts are only supported on AVX1 and higher + if (UseAVX == 0) { + return false; + } + + switch (from_bt) { + case T_INT: { + return to_bt == T_SHORT || to_bt == T_BYTE; + } + case T_SHORT: { + return to_bt == T_BYTE; + } + default: { + return false; + } + } + } + // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { if (VM_Version::supports_avx512dq()) { diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index ae95c2bb6d8b1..3c9890cb154a6 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2347,8 +2347,17 @@ bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) cons type2aelembytes(use_bt) == 4; } - // Default case: input size of use equals output size of def. - return type2aelembytes(use_bt) == type2aelembytes(def_bt); + // Input size of use equals output size of def + if (type2aelembytes(use_bt) == type2aelembytes(def_bt)) { + return true; + } + + // Input sizes differ, but platform supports a cast to change the def shape to the use shape + if (Matcher::is_vector_cast_supported(def_bt, use_bt)) { + return true; + } + + return false; } // Return nullptr if success, else failure message diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.cpp b/src/hotspot/share/opto/superwordVTransformBuilder.cpp index aee6add2a98ef..15575ea923f4d 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp @@ -186,6 +186,19 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i Node_List* pack_in = _packset.pack_input_at_index_or_null(pack, index); if (pack_in != nullptr) { + Node* in_p0 = pack_in->at(0); + BasicType def_bt = _vloop_analyzer.types().velt_basic_type(in_p0); + BasicType use_bt = _vloop_analyzer.types().velt_basic_type(p0); + + // If the use and def types are different, emit a cast node + if (use_bt != def_bt && !p0->is_Convert() && Matcher::is_vector_cast_supported(def_bt, use_bt)) { + VTransformNode* in = get_vtnode(pack_in->at(0)); + VTransformNode* cast = new (_vtransform.arena()) VTransformCastNode(_vtransform, pack->size(), def_bt, use_bt); + cast->set_req(1, in); + + return cast; + } + // Input is a matching pack -> vtnode already exists. assert(index != 2 || !VectorNode::is_shift(p0), "shift's count cannot be vector"); return get_vtnode(pack_in->at(0)); diff --git a/src/hotspot/share/opto/vtransform.cpp b/src/hotspot/share/opto/vtransform.cpp index 4730f3ac1343b..e7ef9b7767934 100644 --- a/src/hotspot/share/opto/vtransform.cpp +++ b/src/hotspot/share/opto/vtransform.cpp @@ -601,6 +601,15 @@ VTransformApplyResult VTransformStoreVectorNode::apply(const VLoopAnalyzer& vloo return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size()); } +VTransformApplyResult VTransformCastNode::apply(const VLoopAnalyzer& vloop_analyzer, + const GrowableArray& vnode_idx_to_transformed_node) const { + Node* value = find_transformed_input(1, vnode_idx_to_transformed_node); + VectorNode* vn = VectorCastNode::make(VectorCastNode::opcode(-1, _from_bt), value, _to_bt, _vlen); + register_new_node_from_vectorization(vloop_analyzer, vn, value); + + return VTransformApplyResult::make_vector(vn, _vlen, vn->vect_type()->length_in_bytes()); +} + void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes(const VLoopAnalyzer& vloop_analyzer, Node* vn) const { PhaseIdealLoop* phase = vloop_analyzer.vloop().phase(); Node* first = nodes().at(0); @@ -696,6 +705,10 @@ void VTransformPopulateIndexNode::print_spec() const { tty->print("vlen=%d element_bt=%s", _vlen, type2name(_element_bt)); } +void VTransformCastNode::print_spec() const { + tty->print("vlen=%d from=%s to=%s", _vlen, type2name(_from_bt), type2name(_to_bt)); +} + void VTransformVectorNode::print_spec() const { tty->print("%d-pack[", _nodes.length()); for (int i = 0; i < _nodes.length(); i++) { diff --git a/src/hotspot/share/opto/vtransform.hpp b/src/hotspot/share/opto/vtransform.hpp index 4fc68c7b4dfc2..05d9f37f131ba 100644 --- a/src/hotspot/share/opto/vtransform.hpp +++ b/src/hotspot/share/opto/vtransform.hpp @@ -522,6 +522,22 @@ class VTransformStoreVectorNode : public VTransformMemVectorNode { NOT_PRODUCT(virtual const char* name() const override { return "StoreVector"; };) }; +class VTransformCastNode : public VTransformNode { +private: + uint _vlen; + BasicType _from_bt; + BasicType _to_bt; + +public: + // req = 2 -> [ctrl, input] + VTransformCastNode(VTransform& vtransform, int vlen, BasicType from_bt, BasicType to_bt) : VTransformNode(vtransform, 2), + _vlen(vlen), _from_bt(from_bt), _to_bt(to_bt) {} + virtual VTransformApplyResult apply(const VLoopAnalyzer& vloop_analyzer, + const GrowableArray& vnode_idx_to_transformed_node) const override; + NOT_PRODUCT(virtual const char* name() const override { return "Cast"; };) + NOT_PRODUCT(virtual void print_spec() const override;) +}; + // Invoke callback on all memops, in the order of the schedule. template void VTransformGraph::for_each_memop_in_schedule(Callback callback) const { diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java new file mode 100644 index 0000000000000..d57cd543b53ce --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.loopopts.superword; + +import compiler.lib.ir_framework.*; +import java.util.Random; +import jdk.test.lib.Utils; + +/* + * @test + * @bug 8342095 + * @key randomness + * @summary Ensure that vectorization of conversions between subword types works as expected. + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestSubwordVectorization + */ + +public class TestSubwordVectorization { + private static final Random RANDOM = Utils.getRandomInstance(); + private static final int SIZE = 1024; + + public static void main(String[] args) { + TestFramework.run(); + } + + @Setup + static Object[] setupIntArray() { + int[] res = new int[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = RANDOM.nextInt(); + } + + return new Object[] { res }; + } + + @Setup + static Object[] setupShortArray() { + short[] res = new short[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = (short) RANDOM.nextInt(); + } + + return new Object[] { res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) + @Arguments(setup = "setupIntArray") + public Object[] testIntToShort(int[] ints) { + short[] res = new short[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = (short) ints[i]; + } + + return new Object[] { ints, res }; + } + + @Check(test = "testIntToShort") + public void checkTestIntToShort(Object[] vals) { + int[] ints = (int[]) vals[0]; + short[] res = (short[]) vals[1]; + + for (int i = 0; i < SIZE; i++) { + short value = (short) ints[i]; + + if (res[i] != value) { + throw new IllegalStateException("Int to short test failed: Expected " + value + " but got " + res[i]); + } + } + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) + @Arguments(setup = "setupIntArray") + public Object[] testIntToByte(int[] ints) { + byte[] res = new byte[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = (byte) ints[i]; + } + + return new Object[] { ints, res }; + } + + @Check(test = "testIntToByte") + public void checkTestIntToByte(Object[] vals) { + int[] ints = (int[]) vals[0]; + byte[] res = (byte[]) vals[1]; + + for (int i = 0; i < SIZE; i++) { + byte value = (byte) ints[i]; + + if (res[i] != value) { + throw new IllegalStateException("Int to byte test failed: Expected " + value + " but got " + res[i]); + } + } + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE_ANY, ">0" }) + @Arguments(setup = "setupShortArray") + public Object[] testShortToByte(short[] shorts) { + byte[] res = new byte[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = (byte) shorts[i]; + } + + return new Object[] { shorts, res }; + } + + @Check(test = "testShortToByte") + public void checkTestShortToByte(Object[] vals) { + short[] shorts = (short[]) vals[0]; + byte[] res = (byte[]) vals[1]; + + for (int i = 0; i < SIZE; i++) { + byte value = (byte) shorts[i]; + + if (res[i] != value) { + throw new IllegalStateException("Short to byte test failed: Expected " + value + " but got " + res[i]); + } + } + } +} diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java index 0da101a8fb7a9..c7b864d862ebc 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,7 @@ /* * @test + * @bug 8183390 8340010 8342095 * @summary Vectorization test on array type conversions * @library /test/lib / * @@ -148,10 +149,9 @@ public int[] signExtensionFromByte() { // ---------------- Integer Narrow ---------------- @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) public short[] narrowToSigned() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -161,10 +161,9 @@ public short[] narrowToSigned() { } @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_char)", ">0" }) public char[] narrowToUnsigned() { char[] res = new char[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -174,11 +173,10 @@ public char[] narrowToUnsigned() { } @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. - public byte[] NarrowToByte() { + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) + public byte[] narrowToByte() { byte[] res = new byte[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = (byte) ints[i]; @@ -186,6 +184,18 @@ public byte[] NarrowToByte() { return res; } + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) + public byte[] narrowShortToByte() { + byte[] res = new byte[SIZE]; + for (int i = 0; i < SIZE; i++) { + res[i] = (byte) shorts[i]; + } + return res; + } + // ---------------- Convert I/L to F/D ---------------- @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}, diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java new file mode 100644 index 0000000000000..1b4e5aaa8fa1a --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.*; + +import java.util.concurrent.TimeUnit; +import java.util.Random; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS) +@Fork(value = 3) +public class VectorSubword { + @Param({"1024"}) + public int SIZE; + + private byte[] bytes; + private short[] shorts; + private int[] ints; + + @Setup + public void init() { + bytes = new byte[SIZE]; + shorts = new short[SIZE]; + ints = new int[SIZE]; + } + + @Benchmark + public void intToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = (short) ints[i]; + } + } + + @Benchmark + public void intToByte() { + for (int i = 0; i < SIZE; i++) { + bytes[i] = (byte) ints[i]; + } + } + + @Benchmark + public void shortToByte() { + for (int i = 0; i < SIZE; i++) { + bytes[i] = (byte) shorts[i]; + } + } +} From 3b5447f6b275f9579b4390cc5412f083aa08ebc1 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Thu, 6 Feb 2025 12:29:58 -0500 Subject: [PATCH 02/12] Implement widening and address comments from review --- src/hotspot/cpu/x86/matcher_x86.hpp | 14 ++- .../share/opto/superwordVTransformBuilder.cpp | 2 +- src/hotspot/share/opto/vtransform.cpp | 4 +- src/hotspot/share/opto/vtransform.hpp | 4 +- .../superword/TestSubwordVectorization.java | 117 +++++++++++++++++- .../runner/ArrayTypeConvertTest.java | 32 +++-- 6 files changed, 146 insertions(+), 27 deletions(-) diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index 22b199beff48a..3a188cfcce28d 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -267,12 +267,16 @@ return false; } + // Cannot cast to own type + if (to_bt == from_bt) { + return false; + } + switch (from_bt) { - case T_INT: { - return to_bt == T_SHORT || to_bt == T_BYTE; - } - case T_SHORT: { - return to_bt == T_BYTE; + case T_INT: + case T_SHORT: + case T_BYTE: { + return to_bt == T_INT || to_bt == T_SHORT || to_bt == T_BYTE; } default: { return false; diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.cpp b/src/hotspot/share/opto/superwordVTransformBuilder.cpp index 15575ea923f4d..03f2a34d0c8f3 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp @@ -193,7 +193,7 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i // If the use and def types are different, emit a cast node if (use_bt != def_bt && !p0->is_Convert() && Matcher::is_vector_cast_supported(def_bt, use_bt)) { VTransformNode* in = get_vtnode(pack_in->at(0)); - VTransformNode* cast = new (_vtransform.arena()) VTransformCastNode(_vtransform, pack->size(), def_bt, use_bt); + VTransformNode* cast = new (_vtransform.arena()) VTransformCastVectorNode(_vtransform, pack->size(), def_bt, use_bt); cast->set_req(1, in); return cast; diff --git a/src/hotspot/share/opto/vtransform.cpp b/src/hotspot/share/opto/vtransform.cpp index e7ef9b7767934..65e5b7562f59c 100644 --- a/src/hotspot/share/opto/vtransform.cpp +++ b/src/hotspot/share/opto/vtransform.cpp @@ -601,7 +601,7 @@ VTransformApplyResult VTransformStoreVectorNode::apply(const VLoopAnalyzer& vloo return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size()); } -VTransformApplyResult VTransformCastNode::apply(const VLoopAnalyzer& vloop_analyzer, +VTransformApplyResult VTransformCastVectorNode::apply(const VLoopAnalyzer& vloop_analyzer, const GrowableArray& vnode_idx_to_transformed_node) const { Node* value = find_transformed_input(1, vnode_idx_to_transformed_node); VectorNode* vn = VectorCastNode::make(VectorCastNode::opcode(-1, _from_bt), value, _to_bt, _vlen); @@ -705,7 +705,7 @@ void VTransformPopulateIndexNode::print_spec() const { tty->print("vlen=%d element_bt=%s", _vlen, type2name(_element_bt)); } -void VTransformCastNode::print_spec() const { +void VTransformCastVectorNode::print_spec() const { tty->print("vlen=%d from=%s to=%s", _vlen, type2name(_from_bt), type2name(_to_bt)); } diff --git a/src/hotspot/share/opto/vtransform.hpp b/src/hotspot/share/opto/vtransform.hpp index 05d9f37f131ba..5f2ff9fb2b766 100644 --- a/src/hotspot/share/opto/vtransform.hpp +++ b/src/hotspot/share/opto/vtransform.hpp @@ -522,7 +522,7 @@ class VTransformStoreVectorNode : public VTransformMemVectorNode { NOT_PRODUCT(virtual const char* name() const override { return "StoreVector"; };) }; -class VTransformCastNode : public VTransformNode { +class VTransformCastVectorNode : public VTransformNode { private: uint _vlen; BasicType _from_bt; @@ -530,7 +530,7 @@ class VTransformCastNode : public VTransformNode { public: // req = 2 -> [ctrl, input] - VTransformCastNode(VTransform& vtransform, int vlen, BasicType from_bt, BasicType to_bt) : VTransformNode(vtransform, 2), + VTransformCastVectorNode(VTransform& vtransform, int vlen, BasicType from_bt, BasicType to_bt) : VTransformNode(vtransform, 2), _vlen(vlen), _from_bt(from_bt), _to_bt(to_bt) {} virtual VTransformApplyResult apply(const VLoopAnalyzer& vloop_analyzer, const GrowableArray& vnode_idx_to_transformed_node) const override; diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java index d57cd543b53ce..32a99fa14e3a4 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java @@ -23,6 +23,8 @@ package compiler.loopopts.superword; + +import compiler.lib.generators.*; import compiler.lib.ir_framework.*; import java.util.Random; import jdk.test.lib.Utils; @@ -37,7 +39,8 @@ */ public class TestSubwordVectorization { - private static final Random RANDOM = Utils.getRandomInstance(); + private static final Generator G = Generators.G.ints(); + private static final int SIZE = 1024; public static void main(String[] args) { @@ -49,7 +52,7 @@ static Object[] setupIntArray() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { - res[i] = RANDOM.nextInt(); + res[i] = G.next(); } return new Object[] { res }; @@ -60,16 +63,29 @@ static Object[] setupShortArray() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { - res[i] = (short) RANDOM.nextInt(); + res[i] = G.next().shortValue(); + } + + return new Object[] { res }; + } + + @Setup + static Object[] setupByteArray() { + byte[] res = new byte[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = G.next().byteValue(); } return new Object[] { res }; } + // Narrowing + @Test @IR(applyIfCPUFeature = { "avx", "true" }, applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) @Arguments(setup = "setupIntArray") public Object[] testIntToShort(int[] ints) { short[] res = new short[SIZE]; @@ -98,7 +114,7 @@ public void checkTestIntToShort(Object[] vals) { @Test @IR(applyIfCPUFeature = { "avx", "true" }, applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) + counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) @Arguments(setup = "setupIntArray") public Object[] testIntToByte(int[] ints) { byte[] res = new byte[SIZE]; @@ -127,7 +143,7 @@ public void checkTestIntToByte(Object[] vals) { @Test @IR(applyIfCPUFeature = { "avx", "true" }, applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE_ANY, ">0" }) + counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortToByte(short[] shorts) { byte[] res = new byte[SIZE]; @@ -152,4 +168,93 @@ public void checkTestShortToByte(Object[] vals) { } } } + + // Widening + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_short, max_int)", ">0" }) + @Arguments(setup = "setupShortArray") + public Object[] testShortToInt(short[] shorts) { + int[] res = new int[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = shorts[i]; + } + + return new Object[] { shorts, res }; + } + + @Check(test = "testShortToInt") + public void checkTestShortToInt(Object[] vals) { + short[] shorts = (short[]) vals[0]; + int[] res = (int[]) vals[1]; + + for (int i = 0; i < SIZE; i++) { + int value = shorts[i]; + + if (res[i] != value) { + throw new IllegalStateException("Short to int test failed: Expected " + value + " but got " + res[i]); + } + } + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_byte, max_int)", ">0" }) + @Arguments(setup = "setupByteArray") + public Object[] testByteToInt(byte[] bytes) { + int[] res = new int[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Check(test = "testByteToInt") + public void checkTestByteToInt(Object[] vals) { + byte[] bytes = (byte[]) vals[0]; + int[] res = (int[]) vals[1]; + + for (int i = 0; i < SIZE; i++) { + int value = bytes[i]; + + if (res[i] != value) { + throw new IllegalStateException("Byte to int test failed: Expected " + value + " but got " + res[i]); + } + } + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_short)", ">0" }) + @Arguments(setup = "setupByteArray") + public Object[] testByteToShort(byte[] bytes) { + short[] res = new short[SIZE]; + + for (int i = 0; i < SIZE; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Check(test = "testByteToShort") + public void checkTestByteToShort(Object[] vals) { + byte[] bytes = (byte[]) vals[0]; + short[] res = (short[]) vals[1]; + + for (int i = 0; i < SIZE; i++) { + short value = bytes[i]; + + if (res[i] != value) { + throw new IllegalStateException("Byte to short test failed: Expected " + value + " but got " + res[i]); + } + } + } } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java index c7b864d862ebc..6f391173729b0 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java @@ -109,10 +109,9 @@ public ArrayTypeConvertTest() { // ---------------- Integer Extension ---------------- @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) public int[] signExtension() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -123,7 +122,7 @@ public int[] signExtension() { @Test @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. + // Subword vector casts with char do not work currently, see JDK-8349562. // Assert the vectorization failure so that we are reminded to update // the test when this limitation is addressed in the future. public int[] zeroExtension() { @@ -135,10 +134,9 @@ public int[] zeroExtension() { } @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) public int[] signExtensionFromByte() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -147,6 +145,18 @@ public int[] signExtensionFromByte() { return res; } + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) + public short[] signExtensionFromByteToShort() { + short[] res = new short[SIZE]; + for (int i = 0; i < SIZE; i++) { + res[i] = bytes[i]; + } + return res; + } + // ---------------- Integer Narrow ---------------- @Test @IR(applyIfCPUFeature = { "avx", "true" }, @@ -275,7 +285,7 @@ public double[] convertShortToDouble() { @Test @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. + // Subword vector casts with char do not work currently, see JDK-8349562. // Assert the vectorization failure so that we are reminded to update // the test when this limitation is addressed in the future. public float[] convertCharToFloat() { @@ -288,7 +298,7 @@ public float[] convertCharToFloat() { @Test @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. + // Subword vector casts with char do not work currently, see JDK-8349562. // Assert the vectorization failure so that we are reminded to update // the test when this limitation is addressed in the future. public double[] convertCharToDouble() { From cf75b269cdb6a5914dbbbdea64c097ae0fc4b9eb Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Thu, 6 Feb 2025 14:47:44 -0500 Subject: [PATCH 03/12] Fix some tests that now vectorize --- .../loopopts/superword/TestCompatibleUseDefTypeSize.java | 8 ++++---- .../compiler/vectorization/runner/ArrayShiftOpTest.java | 7 +++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java index 43580f4dee246..95c074762075b 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -327,12 +327,12 @@ static Object[] test0(byte[] src, char[] dst) { } @Test - @IR(counts = {IRNode.STORE_VECTOR, "= 0"}, + @IR(counts = {IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + applyIf = {"AlignVector", "false"}, + applyIfCPUFeature = {"avx", "true"}) // "inflate" method: 1 byte -> 2 byte. // Java scalar code has no explicit conversion. - // Vector code would need a conversion. We may add this in the future. static Object[] test1(byte[] src, char[] dst) { for (int i = 0; i < src.length; i++) { dst[i] = (char)(src[i]); diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java index ec1e3f998ca12..a11c1fdbbe332 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -252,9 +252,8 @@ public short[] vectorUnsignedShiftRight() { } @Test - // Note that right shift operations on subword expressions cannot be - // vectorized since precise type info about signedness is missing. - @IR(failOn = {IRNode.STORE_VECTOR}) + @IR(applyIfCPUFeature = {"avx", "true"}, + counts = {IRNode.RSHIFT_VI, ">0"}) public short[] subwordExpressionRightShift() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { From 6daa8acec8747a6f645895e0a3dfd0e50a507acb Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Sun, 9 Feb 2025 00:58:37 -0500 Subject: [PATCH 04/12] Add new conversions to benchmark --- .../bench/vm/compiler/VectorSubword.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java index 1b4e5aaa8fa1a..0f0c6d9f7249b 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java @@ -70,4 +70,25 @@ public void shortToByte() { bytes[i] = (byte) shorts[i]; } } + + @Benchmark + public void shortToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = shorts[i]; + } + } + + @Benchmark + public void byteToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = bytes[i]; + } + } + + @Benchmark + public void byteToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = bytes[i]; + } + } } From 8920454dd3fd6aa7be3e44f26cca0b31f34e0613 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Sun, 16 Feb 2025 23:51:10 -0500 Subject: [PATCH 05/12] Address comments from review, refactor test --- src/hotspot/cpu/x86/matcher_x86.hpp | 6 +- src/hotspot/share/opto/vtransform.hpp | 2 +- .../TestCompatibleUseDefTypeSize.java | 90 +++++- .../superword/TestSubwordVectorization.java | 260 ------------------ 4 files changed, 92 insertions(+), 266 deletions(-) delete mode 100644 test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index 3a188cfcce28d..6e19c0f068ef9 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -267,11 +267,9 @@ return false; } - // Cannot cast to own type - if (to_bt == from_bt) { - return false; - } + assert(to_bt != from_bt, "Must call with different from_bt and to_bt"); + // T_CHAR is not supported yet due to the backend not implementing vector casts to and from char. switch (from_bt) { case T_INT: case T_SHORT: diff --git a/src/hotspot/share/opto/vtransform.hpp b/src/hotspot/share/opto/vtransform.hpp index 5f2ff9fb2b766..08534ed9cb3e8 100644 --- a/src/hotspot/share/opto/vtransform.hpp +++ b/src/hotspot/share/opto/vtransform.hpp @@ -534,7 +534,7 @@ class VTransformCastVectorNode : public VTransformNode { _vlen(vlen), _from_bt(from_bt), _to_bt(to_bt) {} virtual VTransformApplyResult apply(const VLoopAnalyzer& vloop_analyzer, const GrowableArray& vnode_idx_to_transformed_node) const override; - NOT_PRODUCT(virtual const char* name() const override { return "Cast"; };) + NOT_PRODUCT(virtual const char* name() const override { return "CastVector"; };) NOT_PRODUCT(virtual void print_spec() const override;) }; diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java index 95c074762075b..e5ba811fd8f27 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java @@ -105,6 +105,12 @@ public TestCompatibleUseDefTypeSize() { tests.put("test9", () -> { return test9(aL.clone(), bD.clone()); }); tests.put("test10", () -> { return test10(aL.clone(), bD.clone()); }); tests.put("test11", () -> { return test11(aC.clone()); }); + tests.put("testIntToShort", () -> { return testIntToShort(aI.clone(), bS.clone()); }); + tests.put("testIntToByte", () -> { return testIntToByte(aI.clone(), bB.clone()); }); + tests.put("testShortToByte", () -> { return testShortToByte(aS.clone(), bB.clone()); }); + tests.put("testShortToInt", () -> { return testShortToInt(aS.clone(), bI.clone()); }); + tests.put("testByteToInt", () -> { return testByteToInt(aB.clone(), bI.clone()); }); + tests.put("testByteToShort", () -> { return testByteToShort(aB.clone(), bS.clone()); }); // Compute gold value for all test methods before compilation for (Map.Entry entry : tests.entrySet()) { @@ -127,7 +133,13 @@ public TestCompatibleUseDefTypeSize() { "test8", "test9", "test10", - "test11"}) + "test11", + "testIntToShort", + "testIntToByte", + "testShortToByte", + "testShortToInt", + "testByteToInt", + "testByteToShort"}) public void runTests() { for (Map.Entry entry : tests.entrySet()) { String name = entry.getKey(); @@ -473,4 +485,80 @@ static Object[] test11(char[] a) { } return new Object[]{ a, new char[] { m } }; } + + // Narrowing + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) + public Object[] testIntToShort(int[] ints, short[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = (short) ints[i]; + } + + return new Object[] { ints, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) + public Object[] testIntToByte(int[] ints, byte[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = (byte) ints[i]; + } + + return new Object[] { ints, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) + public Object[] testShortToByte(short[] shorts, byte[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = (byte) shorts[i]; + } + + return new Object[] { shorts, res }; + } + + // Widening + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_short, max_int)", ">0" }) + public Object[] testShortToInt(short[] shorts, int[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = shorts[i]; + } + + return new Object[] { shorts, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_byte, max_int)", ">0" }) + public Object[] testByteToInt(byte[] bytes, int[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_short)", ">0" }) + public Object[] testByteToShort(byte[] bytes, short[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java deleted file mode 100644 index 32a99fa14e3a4..0000000000000 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestSubwordVectorization.java +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package compiler.loopopts.superword; - - -import compiler.lib.generators.*; -import compiler.lib.ir_framework.*; -import java.util.Random; -import jdk.test.lib.Utils; - -/* - * @test - * @bug 8342095 - * @key randomness - * @summary Ensure that vectorization of conversions between subword types works as expected. - * @library /test/lib / - * @run driver compiler.loopopts.superword.TestSubwordVectorization - */ - -public class TestSubwordVectorization { - private static final Generator G = Generators.G.ints(); - - private static final int SIZE = 1024; - - public static void main(String[] args) { - TestFramework.run(); - } - - @Setup - static Object[] setupIntArray() { - int[] res = new int[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = G.next(); - } - - return new Object[] { res }; - } - - @Setup - static Object[] setupShortArray() { - short[] res = new short[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = G.next().shortValue(); - } - - return new Object[] { res }; - } - - @Setup - static Object[] setupByteArray() { - byte[] res = new byte[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = G.next().byteValue(); - } - - return new Object[] { res }; - } - - // Narrowing - - @Test - @IR(applyIfCPUFeature = { "avx", "true" }, - applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) - @Arguments(setup = "setupIntArray") - public Object[] testIntToShort(int[] ints) { - short[] res = new short[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = (short) ints[i]; - } - - return new Object[] { ints, res }; - } - - @Check(test = "testIntToShort") - public void checkTestIntToShort(Object[] vals) { - int[] ints = (int[]) vals[0]; - short[] res = (short[]) vals[1]; - - for (int i = 0; i < SIZE; i++) { - short value = (short) ints[i]; - - if (res[i] != value) { - throw new IllegalStateException("Int to short test failed: Expected " + value + " but got " + res[i]); - } - } - } - - @Test - @IR(applyIfCPUFeature = { "avx", "true" }, - applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) - @Arguments(setup = "setupIntArray") - public Object[] testIntToByte(int[] ints) { - byte[] res = new byte[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = (byte) ints[i]; - } - - return new Object[] { ints, res }; - } - - @Check(test = "testIntToByte") - public void checkTestIntToByte(Object[] vals) { - int[] ints = (int[]) vals[0]; - byte[] res = (byte[]) vals[1]; - - for (int i = 0; i < SIZE; i++) { - byte value = (byte) ints[i]; - - if (res[i] != value) { - throw new IllegalStateException("Int to byte test failed: Expected " + value + " but got " + res[i]); - } - } - } - - @Test - @IR(applyIfCPUFeature = { "avx", "true" }, - applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) - @Arguments(setup = "setupShortArray") - public Object[] testShortToByte(short[] shorts) { - byte[] res = new byte[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = (byte) shorts[i]; - } - - return new Object[] { shorts, res }; - } - - @Check(test = "testShortToByte") - public void checkTestShortToByte(Object[] vals) { - short[] shorts = (short[]) vals[0]; - byte[] res = (byte[]) vals[1]; - - for (int i = 0; i < SIZE; i++) { - byte value = (byte) shorts[i]; - - if (res[i] != value) { - throw new IllegalStateException("Short to byte test failed: Expected " + value + " but got " + res[i]); - } - } - } - - // Widening - - @Test - @IR(applyIfCPUFeature = { "avx", "true" }, - applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_short, max_int)", ">0" }) - @Arguments(setup = "setupShortArray") - public Object[] testShortToInt(short[] shorts) { - int[] res = new int[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = shorts[i]; - } - - return new Object[] { shorts, res }; - } - - @Check(test = "testShortToInt") - public void checkTestShortToInt(Object[] vals) { - short[] shorts = (short[]) vals[0]; - int[] res = (int[]) vals[1]; - - for (int i = 0; i < SIZE; i++) { - int value = shorts[i]; - - if (res[i] != value) { - throw new IllegalStateException("Short to int test failed: Expected " + value + " but got " + res[i]); - } - } - } - - @Test - @IR(applyIfCPUFeature = { "avx", "true" }, - applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_byte, max_int)", ">0" }) - @Arguments(setup = "setupByteArray") - public Object[] testByteToInt(byte[] bytes) { - int[] res = new int[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = bytes[i]; - } - - return new Object[] { bytes, res }; - } - - @Check(test = "testByteToInt") - public void checkTestByteToInt(Object[] vals) { - byte[] bytes = (byte[]) vals[0]; - int[] res = (int[]) vals[1]; - - for (int i = 0; i < SIZE; i++) { - int value = bytes[i]; - - if (res[i] != value) { - throw new IllegalStateException("Byte to int test failed: Expected " + value + " but got " + res[i]); - } - } - } - - @Test - @IR(applyIfCPUFeature = { "avx", "true" }, - applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, - counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_short)", ">0" }) - @Arguments(setup = "setupByteArray") - public Object[] testByteToShort(byte[] bytes) { - short[] res = new short[SIZE]; - - for (int i = 0; i < SIZE; i++) { - res[i] = bytes[i]; - } - - return new Object[] { bytes, res }; - } - - @Check(test = "testByteToShort") - public void checkTestByteToShort(Object[] vals) { - byte[] bytes = (byte[]) vals[0]; - short[] res = (short[]) vals[1]; - - for (int i = 0; i < SIZE; i++) { - short value = bytes[i]; - - if (res[i] != value) { - throw new IllegalStateException("Byte to short test failed: Expected " + value + " but got " + res[i]); - } - } - } -} From 482ddbc4fdb5670eeb5309c4aee778e4363b7c37 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Wed, 2 Apr 2025 09:56:57 -0400 Subject: [PATCH 06/12] Implement patch with VectorCastNode::implemented --- src/hotspot/cpu/aarch64/matcher_aarch64.hpp | 4 ---- src/hotspot/cpu/arm/matcher_arm.hpp | 4 ---- src/hotspot/cpu/ppc/matcher_ppc.hpp | 4 ---- src/hotspot/cpu/riscv/matcher_riscv.hpp | 4 ---- src/hotspot/cpu/s390/matcher_s390.hpp | 4 ---- src/hotspot/cpu/x86/matcher_x86.hpp | 21 ------------------- src/hotspot/share/opto/superword.cpp | 7 ++++--- src/hotspot/share/opto/superword.hpp | 2 +- .../share/opto/superwordVTransformBuilder.cpp | 3 ++- 9 files changed, 7 insertions(+), 46 deletions(-) diff --git a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp index ebd5339027719..aa8135ee2d6e0 100644 --- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp @@ -198,10 +198,6 @@ } } - static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { - return false; - } - // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/arm/matcher_arm.hpp b/src/hotspot/cpu/arm/matcher_arm.hpp index 252ac261e70a8..d970c74fb7edc 100644 --- a/src/hotspot/cpu/arm/matcher_arm.hpp +++ b/src/hotspot/cpu/arm/matcher_arm.hpp @@ -191,10 +191,6 @@ } } - static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { - return false; - } - // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp index 5b0f5899ab2c4..441339b94c61b 100644 --- a/src/hotspot/cpu/ppc/matcher_ppc.hpp +++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp @@ -202,10 +202,6 @@ } } - static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { - return false; - } - // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp index c5a6d83819a75..9a0aab5266f58 100644 --- a/src/hotspot/cpu/riscv/matcher_riscv.hpp +++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp @@ -197,10 +197,6 @@ } } - static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { - return false; - } - // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp index 87858b8f7e0e5..e43e8f6f30fbb 100644 --- a/src/hotspot/cpu/s390/matcher_s390.hpp +++ b/src/hotspot/cpu/s390/matcher_s390.hpp @@ -194,10 +194,6 @@ } } - static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { - return false; - } - // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { return false; diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index 6e19c0f068ef9..77a88d9024ca1 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -261,27 +261,6 @@ } } - static bool is_vector_cast_supported(BasicType from_bt, BasicType to_bt) { - // Vector casts are only supported on AVX1 and higher - if (UseAVX == 0) { - return false; - } - - assert(to_bt != from_bt, "Must call with different from_bt and to_bt"); - - // T_CHAR is not supported yet due to the backend not implementing vector casts to and from char. - switch (from_bt) { - case T_INT: - case T_SHORT: - case T_BYTE: { - return to_bt == T_INT || to_bt == T_SHORT || to_bt == T_BYTE; - } - default: { - return false; - } - } - } - // Is SIMD sort supported for this CPU? static bool supports_simd_sort(BasicType bt) { if (VM_Version::supports_avx512dq()) { diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index c56ec76e0bdf2..0f6eead404c49 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2255,7 +2255,7 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) const { return true; } - if (!is_velt_basic_type_compatible_use_def(use, def)) { + if (!is_velt_basic_type_compatible_use_def(use, def, d_pk->size())) { return false; } @@ -2321,7 +2321,7 @@ Node_List* PackSet::strided_pack_input_at_index_or_null(const Node_List* pack, c // Check if the output type of def is compatible with the input type of use, i.e. if the // types have the same size. -bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) const { +bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint def_size) const { assert(in_bb(def) && in_bb(use), "both use and def are in loop"); // Conversions are trivially compatible. @@ -2353,7 +2353,8 @@ bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) cons } // Input sizes differ, but platform supports a cast to change the def shape to the use shape - if (Matcher::is_vector_cast_supported(def_bt, use_bt)) { + + if ((is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, def_size, def_bt, use_bt)) { return true; } diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 57a403b449843..a5a4f67dd4ac2 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -653,7 +653,7 @@ class SuperWord : public ResourceObj { // Is use->in(u_idx) a vector use? bool is_vector_use(Node* use, int u_idx) const; - bool is_velt_basic_type_compatible_use_def(Node* use, Node* def) const; + bool is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint def_size) const; bool schedule_and_apply() const; }; diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.cpp b/src/hotspot/share/opto/superwordVTransformBuilder.cpp index 03f2a34d0c8f3..470ef40c1e3db 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp @@ -191,7 +191,8 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i BasicType use_bt = _vloop_analyzer.types().velt_basic_type(p0); // If the use and def types are different, emit a cast node - if (use_bt != def_bt && !p0->is_Convert() && Matcher::is_vector_cast_supported(def_bt, use_bt)) { + if (use_bt != def_bt && !p0->is_Convert() + && (is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack->size(), def_bt, use_bt)) { VTransformNode* in = get_vtnode(pack_in->at(0)); VTransformNode* cast = new (_vtransform.arena()) VTransformCastVectorNode(_vtransform, pack->size(), def_bt, use_bt); cast->set_req(1, in); From fc7be77ca63ac5becaed0bb87725371732469fd8 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Wed, 2 Apr 2025 10:09:06 -0400 Subject: [PATCH 07/12] Fix copyright --- src/hotspot/cpu/aarch64/matcher_aarch64.hpp | 2 +- src/hotspot/cpu/arm/matcher_arm.hpp | 2 +- src/hotspot/cpu/ppc/matcher_ppc.hpp | 2 +- src/hotspot/cpu/s390/matcher_s390.hpp | 2 +- src/hotspot/cpu/x86/matcher_x86.hpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp index 5e96af539907c..a6cd055775870 100644 --- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/arm/matcher_arm.hpp b/src/hotspot/cpu/arm/matcher_arm.hpp index 6c818e1f20db4..66fe8ac330eb5 100644 --- a/src/hotspot/cpu/arm/matcher_arm.hpp +++ b/src/hotspot/cpu/arm/matcher_arm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp index 666bec9e0c802..4e8147a0ca380 100644 --- a/src/hotspot/cpu/ppc/matcher_ppc.hpp +++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp index 99461e33e3c87..e4c277c63a8b9 100644 --- a/src/hotspot/cpu/s390/matcher_s390.hpp +++ b/src/hotspot/cpu/s390/matcher_s390.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2017, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index 22fac69b75a78..78591989b5b76 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it From 36f598a60b86ccb64ea6ce70a0866081ee49d5db Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Wed, 2 Apr 2025 10:10:18 -0400 Subject: [PATCH 08/12] Fix copyright after merge --- src/hotspot/cpu/ppc/matcher_ppc.hpp | 2 +- src/hotspot/cpu/riscv/matcher_riscv.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp index 4e8147a0ca380..666bec9e0c802 100644 --- a/src/hotspot/cpu/ppc/matcher_ppc.hpp +++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp index 3d27a25e3338a..1b490a07f92a6 100644 --- a/src/hotspot/cpu/riscv/matcher_riscv.hpp +++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * From 8c00ef84efa2d634769a07931f22c21dccfeaadd Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Fri, 2 May 2025 00:40:46 -0400 Subject: [PATCH 09/12] Address more comments, make test and benchmark more exhaustive --- src/hotspot/share/opto/superword.cpp | 6 +- src/hotspot/share/opto/superword.hpp | 2 +- .../share/opto/superwordVTransformBuilder.cpp | 5 +- .../TestCompatibleUseDefTypeSize.java | 157 +++++++++++++++++- .../bench/vm/compiler/VectorSubword.java | 123 +++++++++++++- 5 files changed, 271 insertions(+), 22 deletions(-) diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 7de99cae28ba5..8a872afa8230d 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2326,7 +2326,7 @@ Node_List* PackSet::strided_pack_input_at_index_or_null(const Node_List* pack, c // Check if the output type of def is compatible with the input type of use, i.e. if the // types have the same size. -bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint def_size) const { +bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint pack_size) const { assert(in_bb(def) && in_bb(use), "both use and def are in loop"); // Conversions are trivially compatible. @@ -2357,9 +2357,9 @@ bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, cons return true; } - // Input sizes differ, but platform supports a cast to change the def shape to the use shape + // Subword cast: Element sizes differ, but the platform supports a cast to change the def shape to the use shape. - if ((is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, def_size, def_bt, use_bt)) { + if ((is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack_size, def_bt, use_bt)) { return true; } diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index a5a4f67dd4ac2..903f9b2829a4c 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -653,7 +653,7 @@ class SuperWord : public ResourceObj { // Is use->in(u_idx) a vector use? bool is_vector_use(Node* use, int u_idx) const; - bool is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint def_size) const; + bool is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint pack_size) const; bool schedule_and_apply() const; }; diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.cpp b/src/hotspot/share/opto/superwordVTransformBuilder.cpp index 4a2da072531f3..4b97352658827 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp @@ -194,8 +194,9 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i BasicType use_bt = _vloop_analyzer.types().velt_basic_type(p0); // If the use and def types are different, emit a cast node - if (use_bt != def_bt && !p0->is_Convert() - && (is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack->size(), def_bt, use_bt)) { + if (use_bt != def_bt && !p0->is_Convert() && + (is_subword_type(def_bt) || is_subword_type(use_bt)) && + VectorCastNode::implemented(-1, pack->size(), def_bt, use_bt)) { VTransformNode* in = get_vtnode(pack_in->at(0)); VTransformNode* cast = new (_vtransform.arena()) VTransformCastVectorNode(_vtransform, pack->size(), def_bt, use_bt); cast->set_req(1, in); diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java index 375d4805bc50a..5919489a847a8 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java @@ -105,12 +105,22 @@ public TestCompatibleUseDefTypeSize() { tests.put("test9", () -> { return test9(aL.clone(), bD.clone()); }); tests.put("test10", () -> { return test10(aL.clone(), bD.clone()); }); tests.put("test11", () -> { return test11(aC.clone()); }); - tests.put("testIntToShort", () -> { return testIntToShort(aI.clone(), bS.clone()); }); - tests.put("testIntToByte", () -> { return testIntToByte(aI.clone(), bB.clone()); }); - tests.put("testShortToByte", () -> { return testShortToByte(aS.clone(), bB.clone()); }); - tests.put("testShortToInt", () -> { return testShortToInt(aS.clone(), bI.clone()); }); tests.put("testByteToInt", () -> { return testByteToInt(aB.clone(), bI.clone()); }); tests.put("testByteToShort", () -> { return testByteToShort(aB.clone(), bS.clone()); }); + tests.put("testByteToChar", () -> { return testByteToChar(aB.clone(), bC.clone()); }); + tests.put("testByteToLong", () -> { return testByteToLong(aB.clone(), bL.clone()); }); + tests.put("testShortToByte", () -> { return testShortToByte(aS.clone(), bB.clone()); }); + tests.put("testShortToChar", () -> { return testShortToChar(aS.clone(), bC.clone()); }); + tests.put("testShortToInt", () -> { return testShortToInt(aS.clone(), bI.clone()); }); + tests.put("testShortToLong", () -> { return testShortToLong(aS.clone(), bL.clone()); }); + tests.put("testIntToShort", () -> { return testIntToShort(aI.clone(), bS.clone()); }); + tests.put("testIntToChar", () -> { return testIntToChar(aI.clone(), bC.clone()); }); + tests.put("testIntToByte", () -> { return testIntToByte(aI.clone(), bB.clone()); }); + tests.put("testIntToLong", () -> { return testIntToLong(aI.clone(), bL.clone()); }); + tests.put("testLongToByte", () -> { return testLongToByte(aL.clone(), bB.clone()); }); + tests.put("testLongToShort", () -> { return testLongToShort(aL.clone(), bS.clone()); }); + tests.put("testLongToChar", () -> { return testLongToChar(aL.clone(), bC.clone()); }); + tests.put("testLongToInt", () -> { return testLongToInt(aL.clone(), bI.clone()); }); // Compute gold value for all test methods before compilation for (Map.Entry entry : tests.entrySet()) { @@ -134,12 +144,22 @@ public TestCompatibleUseDefTypeSize() { "test9", "test10", "test11", - "testIntToShort", - "testIntToByte", + "testByteToInt", + "testByteToShort", + "testByteToChar", + "testByteToLong", "testShortToByte", + "testShortToChar", "testShortToInt", - "testByteToInt", - "testByteToShort"}) + "testShortToLong", + "testIntToShort", + "testIntToChar", + "testIntToByte", + "testIntToLong", + "testLongToByte", + "testLongToShort", + "testLongToChar", + "testLongToInt"}) public void runTests() { for (Map.Entry entry : tests.entrySet()) { String name = entry.getKey(); @@ -500,6 +520,19 @@ public Object[] testIntToShort(int[] ints, short[] res) { return new Object[] { ints, res }; } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_char)", ">0" }) + public Object[] testIntToChar(int[] ints, char[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = (char) ints[i]; + } + + return new Object[] { ints, res }; + } + @Test @IR(applyIfCPUFeature = { "avx", "true" }, applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, @@ -524,6 +557,66 @@ public Object[] testShortToByte(short[] shorts, byte[] res) { return new Object[] { shorts, res }; } + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2B, IRNode.VECTOR_SIZE + "min(max_long, max_byte)", ">0" }) + public Object[] testLongToByte(long[] longs, byte[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (byte) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2S, IRNode.VECTOR_SIZE + "min(max_long, max_short)", ">0" }) + public Object[] testLongToShort(long[] longs, short[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (short) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2S, IRNode.VECTOR_SIZE + "min(max_long, max_char)", ">0" }) + public Object[] testLongToChar(long[] longs, char[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (char) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2I, IRNode.VECTOR_SIZE + "min(max_long, max_int)", ">0" }) + public Object[] testLongToInt(long[] longs, int[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (int) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.STORE_VECTOR, ">0" }) + public Object[] testShortToChar(short[] shorts, char[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = (char) shorts[i]; + } + + return new Object[] { shorts, res }; + } + // Widening @Test @@ -561,4 +654,52 @@ public Object[] testByteToShort(byte[] bytes, short[] res) { return new Object[] { bytes, res }; } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_char)", ">0" }) + public Object[] testByteToChar(byte[] bytes, char[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = (char) bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2L, IRNode.VECTOR_SIZE + "min(max_byte, max_long)", ">0" }) + public Object[] testByteToLong(byte[] bytes, long[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2L, IRNode.VECTOR_SIZE + "min(max_short, max_long)", ">0" }) + public Object[] testShortToLong(short[] shorts, long[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = shorts[i]; + } + + return new Object[] { shorts, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", ">0" }) + public Object[] testIntToLong(int[] ints, long[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = ints[i]; + } + + return new Object[] { ints, res }; + } } diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java index 0f0c6d9f7249b..9bfcc57ce4256 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java @@ -41,19 +41,46 @@ public class VectorSubword { private byte[] bytes; private short[] shorts; + private char[] chars; private int[] ints; + private long[] longs; @Setup public void init() { bytes = new byte[SIZE]; shorts = new short[SIZE]; + chars = new char[SIZE]; ints = new int[SIZE]; + longs = new long[SIZE]; } + // Narrowing + @Benchmark - public void intToShort() { + public void shortToByte() { for (int i = 0; i < SIZE; i++) { - shorts[i] = (short) ints[i]; + bytes[i] = (byte) shorts[i]; + } + } + + @Benchmark + public void shortToChar() { + for (int i = 0; i < SIZE; i++) { + chars[i] = (char) shorts[i]; + } + } + + @Benchmark + public void charToByte() { + for (int i = 0; i < SIZE; i++) { + bytes[i] = (byte) chars[i]; + } + } + + @Benchmark + public void charToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = (short) chars[i]; } } @@ -65,30 +92,110 @@ public void intToByte() { } @Benchmark - public void shortToByte() { + public void intToShort() { for (int i = 0; i < SIZE; i++) { - bytes[i] = (byte) shorts[i]; + shorts[i] = (short) ints[i]; } } @Benchmark - public void shortToInt() { + public void intToChar() { for (int i = 0; i < SIZE; i++) { - ints[i] = shorts[i]; + chars[i] = (char) ints[i]; } } @Benchmark - public void byteToInt() { + public void longToByte() { for (int i = 0; i < SIZE; i++) { - ints[i] = bytes[i]; + bytes[i] = (byte) longs[i]; + } + } + + @Benchmark + public void longToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = (short) longs[i]; + } + } + + @Benchmark + public void longToChar() { + for (int i = 0; i < SIZE; i++) { + chars[i] = (char) longs[i]; } } + @Benchmark + public void longToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = (int) longs[i]; + } + } + + // Widening + @Benchmark public void byteToShort() { for (int i = 0; i < SIZE; i++) { shorts[i] = bytes[i]; } } + + @Benchmark + public void byteToChar() { + for (int i = 0; i < SIZE; i++) { + chars[i] = (char) bytes[i]; + } + } + + @Benchmark + public void byteToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = bytes[i]; + } + } + + @Benchmark + public void byteToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = bytes[i]; + } + } + + @Benchmark + public void shortToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = shorts[i]; + } + } + + @Benchmark + public void shortToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = shorts[i]; + } + } + + @Benchmark + public void charToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = chars[i]; + } + } + + @Benchmark + public void charToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = chars[i]; + } + } + + @Benchmark + public void intToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = ints[i]; + } + } + } From 03ee11541b8159e779bd91717d0febd639d5cb21 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Sat, 3 May 2025 13:09:37 -0400 Subject: [PATCH 10/12] Whitespace and benchmark tweak --- src/hotspot/share/opto/superword.cpp | 1 - test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 8a872afa8230d..43d8f9de26e20 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2358,7 +2358,6 @@ bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, cons } // Subword cast: Element sizes differ, but the platform supports a cast to change the def shape to the use shape. - if ((is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack_size, def_bt, use_bt)) { return true; } diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java index 9bfcc57ce4256..424220f789631 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java @@ -32,8 +32,8 @@ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) @State(Scope.Thread) -@Warmup(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @Fork(value = 3) public class VectorSubword { @Param({"1024"}) From 78934c9630d414be6f18de3ddde39091a7032454 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Sun, 11 May 2025 23:07:58 -0400 Subject: [PATCH 11/12] Check for AVX2 for byte/long conversions --- .../loopopts/superword/TestCompatibleUseDefTypeSize.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java index 5919489a847a8..284afe635814d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java @@ -558,7 +558,7 @@ public Object[] testShortToByte(short[] shorts, byte[] res) { } @Test - @IR(applyIfCPUFeature = { "avx", "true" }, + @IR(applyIfCPUFeature = { "avx2", "true" }, applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, counts = { IRNode.VECTOR_CAST_L2B, IRNode.VECTOR_SIZE + "min(max_long, max_byte)", ">0" }) public Object[] testLongToByte(long[] longs, byte[] res) { @@ -668,7 +668,7 @@ public Object[] testByteToChar(byte[] bytes, char[] res) { } @Test - @IR(applyIfCPUFeature = { "avx", "true" }, + @IR(applyIfCPUFeature = { "avx2", "true" }, applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, counts = { IRNode.VECTOR_CAST_B2L, IRNode.VECTOR_SIZE + "min(max_byte, max_long)", ">0" }) public Object[] testByteToLong(byte[] bytes, long[] res) { From aabaafba5fdb27a943895ec53a71b74ad98af789 Mon Sep 17 00:00:00 2001 From: Jasmine Karthikeyan <25208576+jaskarth@users.noreply.github.com> Date: Wed, 30 Jul 2025 23:31:30 -0400 Subject: [PATCH 12/12] Update tests, cleanup logic --- src/hotspot/share/opto/superword.cpp | 9 ++++++++- src/hotspot/share/opto/superword.hpp | 3 +++ .../share/opto/superwordVTransformBuilder.cpp | 4 +--- .../jtreg/compiler/c2/TestMinMaxSubword.java | 14 +++++++------- .../vectorization/TestSubwordTruncation.java | 16 ++++++++-------- .../vectorization/runner/BasicShortOpTest.java | 10 ++++------ 6 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index a4f5764a0e0d0..b01d63a5b0aba 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2407,13 +2407,20 @@ bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, cons } // Subword cast: Element sizes differ, but the platform supports a cast to change the def shape to the use shape. - if ((is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack_size, def_bt, use_bt)) { + if (is_supported_subword_cast(def_bt, use_bt, pack_size)) { return true; } return false; } +bool SuperWord::is_supported_subword_cast(BasicType def_bt, BasicType use_bt, const uint pack_size) { + assert(def_bt != use_bt, "use and def types must be different"); + + // Opcode is only required to disambiguate half float, so we pass -1 as it can't be encountered here. + return (is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack_size, def_bt, use_bt); +} + // Return nullptr if success, else failure message VStatus VLoopBody::construct() { assert(_body.is_empty(), "body is empty"); diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 903f9b2829a4c..ba2c200692619 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -656,6 +656,9 @@ class SuperWord : public ResourceObj { bool is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint pack_size) const; bool schedule_and_apply() const; + +public: + static bool is_supported_subword_cast(BasicType def_bt, BasicType use_bt, const uint pack_size); }; #endif // SHARE_OPTO_SUPERWORD_HPP diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.cpp b/src/hotspot/share/opto/superwordVTransformBuilder.cpp index 4b97352658827..ed7f475a50281 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp @@ -194,9 +194,7 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i BasicType use_bt = _vloop_analyzer.types().velt_basic_type(p0); // If the use and def types are different, emit a cast node - if (use_bt != def_bt && !p0->is_Convert() && - (is_subword_type(def_bt) || is_subword_type(use_bt)) && - VectorCastNode::implemented(-1, pack->size(), def_bt, use_bt)) { + if (use_bt != def_bt && !p0->is_Convert() && SuperWord::is_supported_subword_cast(def_bt, use_bt, pack->size())) { VTransformNode* in = get_vtnode(pack_in->at(0)); VTransformNode* cast = new (_vtransform.arena()) VTransformCastVectorNode(_vtransform, pack->size(), def_bt, use_bt); cast->set_req(1, in); diff --git a/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java b/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java index 955aa4058f046..fa30c8f76fc3b 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java +++ b/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java @@ -57,11 +57,11 @@ public class TestMinMaxSubword { } } - // Ensure vector max/min instructions are not generated for integer subword types - // as Java APIs for Math.min/max do not support integer subword types and superword - // should not generate vectorized Min/Max nodes for them. + // Ensure that casts to/from subword types are emitted, as java APIs for Math.min/max do not support integer subword + // types and superword should generate int versions and then cast between them. + @Test - @IR(failOn = {IRNode.MIN_VI, IRNode.MIN_VF, IRNode.MIN_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMinShort() { for (int i = 0; i < LENGTH; i++) { sb[i] = (short) Math.min(sa[i], val); @@ -77,7 +77,7 @@ public static void testMinShort_runner() { } @Test - @IR(failOn = {IRNode.MAX_VI, IRNode.MAX_VF, IRNode.MAX_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMaxShort() { for (int i = 0; i < LENGTH; i++) { sb[i] = (short) Math.max(sa[i], val); @@ -92,7 +92,7 @@ public static void testMaxShort_runner() { } @Test - @IR(failOn = {IRNode.MIN_VI, IRNode.MIN_VF, IRNode.MIN_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMinByte() { for (int i = 0; i < LENGTH; i++) { bb[i] = (byte) Math.min(ba[i], val); @@ -108,7 +108,7 @@ public static void testMinByte_runner() { } @Test - @IR(failOn = {IRNode.MAX_VI, IRNode.MAX_VF, IRNode.MAX_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMaxByte() { for (int i = 0; i < LENGTH; i++) { bb[i] = (byte) Math.max(ba[i], val); diff --git a/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java b/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java index 5985367b26572..ce053201d2feb 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java @@ -73,7 +73,7 @@ static Object[] setupCharArray() { // Shorts @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortLeadingZeros(short[] in) { short[] res = new short[SIZE]; @@ -98,7 +98,7 @@ public void checkTestShortLeadingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortTrailingZeros(short[] in) { short[] res = new short[SIZE]; @@ -123,7 +123,7 @@ public void checkTestShortTrailingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortReverse(short[] in) { short[] res = new short[SIZE]; @@ -148,7 +148,7 @@ public void checkTestShortReverse(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortBitCount(short[] in) { short[] res = new short[SIZE]; @@ -277,7 +277,7 @@ public void checkTestCharBitCount(Object[] vals) { // Bytes @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteLeadingZeros(byte[] in) { byte[] res = new byte[SIZE]; @@ -302,7 +302,7 @@ public void checkTestByteLeadingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteTrailingZeros(byte[] in) { byte[] res = new byte[SIZE]; @@ -327,7 +327,7 @@ public void checkTestByteTrailingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteReverse(byte[] in) { byte[] res = new byte[SIZE]; @@ -352,7 +352,7 @@ public void checkTestByteReverse(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteBitCount(byte[] in) { byte[] res = new byte[SIZE]; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java index 63739584558be..b81d90484f31b 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java @@ -210,10 +210,10 @@ public short[] vectorSignedShiftRight() { return res; } + // Min/Max vectorization requires a cast from subword to int and back to subword, to avoid losing the higher order bits. + @Test - // Note that min operations on subword types cannot be vectorized - // because higher bits will be lost. - @IR(failOn = {IRNode.STORE_VECTOR}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public short[] vectorMin() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -223,9 +223,7 @@ public short[] vectorMin() { } @Test - // Note that max operations on subword types cannot be vectorized - // because higher bits will be lost. - @IR(failOn = {IRNode.STORE_VECTOR}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public short[] vectorMax() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) {