diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 510cb1b51de0e..b01d63a5b0aba 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2309,7 +2309,7 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) const { return true; } - if (!is_velt_basic_type_compatible_use_def(use, def)) { + if (!is_velt_basic_type_compatible_use_def(use, def, d_pk->size())) { return false; } @@ -2375,7 +2375,7 @@ Node_List* PackSet::strided_pack_input_at_index_or_null(const Node_List* pack, c // Check if the output type of def is compatible with the input type of use, i.e. if the // types have the same size. -bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) const { +bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint pack_size) const { assert(in_bb(def) && in_bb(use), "both use and def are in loop"); // Conversions are trivially compatible. @@ -2401,8 +2401,24 @@ bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) cons type2aelembytes(use_bt) == 4; } - // Default case: input size of use equals output size of def. - return type2aelembytes(use_bt) == type2aelembytes(def_bt); + // Input size of use equals output size of def + if (type2aelembytes(use_bt) == type2aelembytes(def_bt)) { + return true; + } + + // Subword cast: Element sizes differ, but the platform supports a cast to change the def shape to the use shape. + if (is_supported_subword_cast(def_bt, use_bt, pack_size)) { + return true; + } + + return false; +} + +bool SuperWord::is_supported_subword_cast(BasicType def_bt, BasicType use_bt, const uint pack_size) { + assert(def_bt != use_bt, "use and def types must be different"); + + // Opcode is only required to disambiguate half float, so we pass -1 as it can't be encountered here. + return (is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack_size, def_bt, use_bt); } // Return nullptr if success, else failure message diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 57a403b449843..ba2c200692619 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -653,9 +653,12 @@ class SuperWord : public ResourceObj { // Is use->in(u_idx) a vector use? bool is_vector_use(Node* use, int u_idx) const; - bool is_velt_basic_type_compatible_use_def(Node* use, Node* def) const; + bool is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint pack_size) const; bool schedule_and_apply() const; + +public: + static bool is_supported_subword_cast(BasicType def_bt, BasicType use_bt, const uint pack_size); }; #endif // SHARE_OPTO_SUPERWORD_HPP diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.cpp b/src/hotspot/share/opto/superwordVTransformBuilder.cpp index 83496f9d0be0a..ed7f475a50281 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp @@ -189,6 +189,19 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i Node_List* pack_in = _packset.pack_input_at_index_or_null(pack, index); if (pack_in != nullptr) { + Node* in_p0 = pack_in->at(0); + BasicType def_bt = _vloop_analyzer.types().velt_basic_type(in_p0); + BasicType use_bt = _vloop_analyzer.types().velt_basic_type(p0); + + // If the use and def types are different, emit a cast node + if (use_bt != def_bt && !p0->is_Convert() && SuperWord::is_supported_subword_cast(def_bt, use_bt, pack->size())) { + VTransformNode* in = get_vtnode(pack_in->at(0)); + VTransformNode* cast = new (_vtransform.arena()) VTransformCastVectorNode(_vtransform, pack->size(), def_bt, use_bt); + cast->set_req(1, in); + + return cast; + } + // Input is a matching pack -> vtnode already exists. assert(index != 2 || !VectorNode::is_shift(p0), "shift's count cannot be vector"); return get_vtnode(pack_in->at(0)); diff --git a/src/hotspot/share/opto/vtransform.cpp b/src/hotspot/share/opto/vtransform.cpp index 8a9d4aed13e7f..7be392fd80719 100644 --- a/src/hotspot/share/opto/vtransform.cpp +++ b/src/hotspot/share/opto/vtransform.cpp @@ -695,6 +695,15 @@ VTransformApplyResult VTransformStoreVectorNode::apply(const VLoopAnalyzer& vloo return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size()); } +VTransformApplyResult VTransformCastVectorNode::apply(const VLoopAnalyzer& vloop_analyzer, + const GrowableArray& vnode_idx_to_transformed_node) const { + Node* value = find_transformed_input(1, vnode_idx_to_transformed_node); + VectorNode* vn = VectorCastNode::make(VectorCastNode::opcode(-1, _from_bt), value, _to_bt, _vlen); + register_new_node_from_vectorization(vloop_analyzer, vn, value); + + return VTransformApplyResult::make_vector(vn, _vlen, vn->vect_type()->length_in_bytes()); +} + void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes(const VLoopAnalyzer& vloop_analyzer, Node* vn) const { PhaseIdealLoop* phase = vloop_analyzer.vloop().phase(); Node* first = nodes().at(0); @@ -790,6 +799,10 @@ void VTransformPopulateIndexNode::print_spec() const { tty->print("vlen=%d element_bt=%s", _vlen, type2name(_element_bt)); } +void VTransformCastVectorNode::print_spec() const { + tty->print("vlen=%d from=%s to=%s", _vlen, type2name(_from_bt), type2name(_to_bt)); +} + void VTransformVectorNode::print_spec() const { tty->print("%d-pack[", _nodes.length()); for (int i = 0; i < _nodes.length(); i++) { diff --git a/src/hotspot/share/opto/vtransform.hpp b/src/hotspot/share/opto/vtransform.hpp index 555f565360d2e..4ac3eb56f7517 100644 --- a/src/hotspot/share/opto/vtransform.hpp +++ b/src/hotspot/share/opto/vtransform.hpp @@ -530,6 +530,22 @@ class VTransformStoreVectorNode : public VTransformMemVectorNode { NOT_PRODUCT(virtual const char* name() const override { return "StoreVector"; };) }; +class VTransformCastVectorNode : public VTransformNode { +private: + uint _vlen; + BasicType _from_bt; + BasicType _to_bt; + +public: + // req = 2 -> [ctrl, input] + VTransformCastVectorNode(VTransform& vtransform, int vlen, BasicType from_bt, BasicType to_bt) : VTransformNode(vtransform, 2), + _vlen(vlen), _from_bt(from_bt), _to_bt(to_bt) {} + virtual VTransformApplyResult apply(const VLoopAnalyzer& vloop_analyzer, + const GrowableArray& vnode_idx_to_transformed_node) const override; + NOT_PRODUCT(virtual const char* name() const override { return "CastVector"; };) + NOT_PRODUCT(virtual void print_spec() const override;) +}; + // Invoke callback on all memops, in the order of the schedule. template void VTransformGraph::for_each_memop_in_schedule(Callback callback) const { diff --git a/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java b/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java index 955aa4058f046..fa30c8f76fc3b 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java +++ b/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java @@ -57,11 +57,11 @@ public class TestMinMaxSubword { } } - // Ensure vector max/min instructions are not generated for integer subword types - // as Java APIs for Math.min/max do not support integer subword types and superword - // should not generate vectorized Min/Max nodes for them. + // Ensure that casts to/from subword types are emitted, as java APIs for Math.min/max do not support integer subword + // types and superword should generate int versions and then cast between them. + @Test - @IR(failOn = {IRNode.MIN_VI, IRNode.MIN_VF, IRNode.MIN_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMinShort() { for (int i = 0; i < LENGTH; i++) { sb[i] = (short) Math.min(sa[i], val); @@ -77,7 +77,7 @@ public static void testMinShort_runner() { } @Test - @IR(failOn = {IRNode.MAX_VI, IRNode.MAX_VF, IRNode.MAX_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMaxShort() { for (int i = 0; i < LENGTH; i++) { sb[i] = (short) Math.max(sa[i], val); @@ -92,7 +92,7 @@ public static void testMaxShort_runner() { } @Test - @IR(failOn = {IRNode.MIN_VI, IRNode.MIN_VF, IRNode.MIN_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMinByte() { for (int i = 0; i < LENGTH; i++) { bb[i] = (byte) Math.min(ba[i], val); @@ -108,7 +108,7 @@ public static void testMinByte_runner() { } @Test - @IR(failOn = {IRNode.MAX_VI, IRNode.MAX_VF, IRNode.MAX_VD}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) public static void testMaxByte() { for (int i = 0; i < LENGTH; i++) { bb[i] = (byte) Math.max(ba[i], val); diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java index f5445f3106b52..284afe635814d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -105,6 +105,22 @@ public TestCompatibleUseDefTypeSize() { tests.put("test9", () -> { return test9(aL.clone(), bD.clone()); }); tests.put("test10", () -> { return test10(aL.clone(), bD.clone()); }); tests.put("test11", () -> { return test11(aC.clone()); }); + tests.put("testByteToInt", () -> { return testByteToInt(aB.clone(), bI.clone()); }); + tests.put("testByteToShort", () -> { return testByteToShort(aB.clone(), bS.clone()); }); + tests.put("testByteToChar", () -> { return testByteToChar(aB.clone(), bC.clone()); }); + tests.put("testByteToLong", () -> { return testByteToLong(aB.clone(), bL.clone()); }); + tests.put("testShortToByte", () -> { return testShortToByte(aS.clone(), bB.clone()); }); + tests.put("testShortToChar", () -> { return testShortToChar(aS.clone(), bC.clone()); }); + tests.put("testShortToInt", () -> { return testShortToInt(aS.clone(), bI.clone()); }); + tests.put("testShortToLong", () -> { return testShortToLong(aS.clone(), bL.clone()); }); + tests.put("testIntToShort", () -> { return testIntToShort(aI.clone(), bS.clone()); }); + tests.put("testIntToChar", () -> { return testIntToChar(aI.clone(), bC.clone()); }); + tests.put("testIntToByte", () -> { return testIntToByte(aI.clone(), bB.clone()); }); + tests.put("testIntToLong", () -> { return testIntToLong(aI.clone(), bL.clone()); }); + tests.put("testLongToByte", () -> { return testLongToByte(aL.clone(), bB.clone()); }); + tests.put("testLongToShort", () -> { return testLongToShort(aL.clone(), bS.clone()); }); + tests.put("testLongToChar", () -> { return testLongToChar(aL.clone(), bC.clone()); }); + tests.put("testLongToInt", () -> { return testLongToInt(aL.clone(), bI.clone()); }); // Compute gold value for all test methods before compilation for (Map.Entry entry : tests.entrySet()) { @@ -127,7 +143,23 @@ public TestCompatibleUseDefTypeSize() { "test8", "test9", "test10", - "test11"}) + "test11", + "testByteToInt", + "testByteToShort", + "testByteToChar", + "testByteToLong", + "testShortToByte", + "testShortToChar", + "testShortToInt", + "testShortToLong", + "testIntToShort", + "testIntToChar", + "testIntToByte", + "testIntToLong", + "testLongToByte", + "testLongToShort", + "testLongToChar", + "testLongToInt"}) public void runTests() { for (Map.Entry entry : tests.entrySet()) { String name = entry.getKey(); @@ -327,12 +359,12 @@ static Object[] test0(byte[] src, char[] dst) { } @Test - @IR(counts = {IRNode.STORE_VECTOR, "= 0"}, + @IR(counts = {IRNode.STORE_VECTOR, "> 0"}, applyIfPlatform = {"64-bit", "true"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"}) + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"}) // "inflate" method: 1 byte -> 2 byte. // Java scalar code has no explicit conversion. - // Vector code would need a conversion. We may add this in the future. static Object[] test1(byte[] src, char[] dst) { for (int i = 0; i < src.length; i++) { dst[i] = (char)(src[i]); @@ -473,4 +505,201 @@ static Object[] test11(char[] a) { } return new Object[]{ a, new char[] { m } }; } + + // Narrowing + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) + public Object[] testIntToShort(int[] ints, short[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = (short) ints[i]; + } + + return new Object[] { ints, res }; + } + + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_char)", ">0" }) + public Object[] testIntToChar(int[] ints, char[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = (char) ints[i]; + } + + return new Object[] { ints, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) + public Object[] testIntToByte(int[] ints, byte[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = (byte) ints[i]; + } + + return new Object[] { ints, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) + public Object[] testShortToByte(short[] shorts, byte[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = (byte) shorts[i]; + } + + return new Object[] { shorts, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx2", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2B, IRNode.VECTOR_SIZE + "min(max_long, max_byte)", ">0" }) + public Object[] testLongToByte(long[] longs, byte[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (byte) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2S, IRNode.VECTOR_SIZE + "min(max_long, max_short)", ">0" }) + public Object[] testLongToShort(long[] longs, short[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (short) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2S, IRNode.VECTOR_SIZE + "min(max_long, max_char)", ">0" }) + public Object[] testLongToChar(long[] longs, char[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (char) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_L2I, IRNode.VECTOR_SIZE + "min(max_long, max_int)", ">0" }) + public Object[] testLongToInt(long[] longs, int[] res) { + for (int i = 0; i < longs.length; i++) { + res[i] = (int) longs[i]; + } + + return new Object[] { longs, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.STORE_VECTOR, ">0" }) + public Object[] testShortToChar(short[] shorts, char[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = (char) shorts[i]; + } + + return new Object[] { shorts, res }; + } + + // Widening + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_short, max_int)", ">0" }) + public Object[] testShortToInt(short[] shorts, int[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = shorts[i]; + } + + return new Object[] { shorts, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_byte, max_int)", ">0" }) + public Object[] testByteToInt(byte[] bytes, int[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_short)", ">0" }) + public Object[] testByteToShort(byte[] bytes, short[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_char)", ">0" }) + public Object[] testByteToChar(byte[] bytes, char[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = (char) bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx2", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2L, IRNode.VECTOR_SIZE + "min(max_byte, max_long)", ">0" }) + public Object[] testByteToLong(byte[] bytes, long[] res) { + for (int i = 0; i < bytes.length; i++) { + res[i] = bytes[i]; + } + + return new Object[] { bytes, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2L, IRNode.VECTOR_SIZE + "min(max_short, max_long)", ">0" }) + public Object[] testShortToLong(short[] shorts, long[] res) { + for (int i = 0; i < shorts.length; i++) { + res[i] = shorts[i]; + } + + return new Object[] { shorts, res }; + } + + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", ">0" }) + public Object[] testIntToLong(int[] ints, long[] res) { + for (int i = 0; i < ints.length; i++) { + res[i] = ints[i]; + } + + return new Object[] { ints, res }; + } } diff --git a/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java b/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java index 5985367b26572..ce053201d2feb 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java @@ -73,7 +73,7 @@ static Object[] setupCharArray() { // Shorts @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortLeadingZeros(short[] in) { short[] res = new short[SIZE]; @@ -98,7 +98,7 @@ public void checkTestShortLeadingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortTrailingZeros(short[] in) { short[] res = new short[SIZE]; @@ -123,7 +123,7 @@ public void checkTestShortTrailingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortReverse(short[] in) { short[] res = new short[SIZE]; @@ -148,7 +148,7 @@ public void checkTestShortReverse(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupShortArray") public Object[] testShortBitCount(short[] in) { short[] res = new short[SIZE]; @@ -277,7 +277,7 @@ public void checkTestCharBitCount(Object[] vals) { // Bytes @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteLeadingZeros(byte[] in) { byte[] res = new byte[SIZE]; @@ -302,7 +302,7 @@ public void checkTestByteLeadingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteTrailingZeros(byte[] in) { byte[] res = new byte[SIZE]; @@ -327,7 +327,7 @@ public void checkTestByteTrailingZeros(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteReverse(byte[] in) { byte[] res = new byte[SIZE]; @@ -352,7 +352,7 @@ public void checkTestByteReverse(Object[] vals) { } @Test - @IR(counts = { IRNode.STORE_VECTOR, "=0" }) + @IR(applyIfCPUFeature = { "avx2", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE_ANY, ">0" }) @Arguments(setup = "setupByteArray") public Object[] testByteBitCount(byte[] in) { byte[] res = new byte[SIZE]; diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java index bcf5196d694b5..a0434411da780 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2025, Rivos Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -247,9 +247,8 @@ public short[] vectorUnsignedShiftRight() { } @Test - // Note that right shift operations on subword expressions cannot be - // vectorized since precise type info about signedness is missing. - @IR(failOn = {IRNode.STORE_VECTOR}) + @IR(applyIfCPUFeature = {"avx", "true"}, + counts = {IRNode.RSHIFT_VI, ">0"}) public short[] subwordExpressionRightShift() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java index 3fa636b42f78e..157d8d5c2f712 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java @@ -24,6 +24,7 @@ /* * @test + * @bug 8183390 8340010 8342095 * @summary Vectorization test on array type conversions * @library /test/lib / * @@ -108,10 +109,9 @@ public ArrayTypeConvertTest() { // ---------------- Integer Extension ---------------- @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) public int[] signExtension() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -122,7 +122,7 @@ public int[] signExtension() { @Test @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. + // Subword vector casts with char do not work currently, see JDK-8349562. // Assert the vectorization failure so that we are reminded to update // the test when this limitation is addressed in the future. public int[] zeroExtension() { @@ -134,10 +134,9 @@ public int[] zeroExtension() { } @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) public int[] signExtensionFromByte() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -146,12 +145,23 @@ public int[] signExtensionFromByte() { return res; } + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) + public short[] signExtensionFromByteToShort() { + short[] res = new short[SIZE]; + for (int i = 0; i < SIZE; i++) { + res[i] = bytes[i]; + } + return res; + } + // ---------------- Integer Narrow ---------------- @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" }) public short[] narrowToSigned() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -161,10 +171,9 @@ public short[] narrowToSigned() { } @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_char)", ">0" }) public char[] narrowToUnsigned() { char[] res = new char[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -174,11 +183,10 @@ public char[] narrowToUnsigned() { } @Test - @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. - // Assert the vectorization failure so that we are reminded to update - // the test when this limitation is addressed in the future. - public byte[] NarrowToByte() { + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" }) + public byte[] narrowToByte() { byte[] res = new byte[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = (byte) ints[i]; @@ -186,6 +194,18 @@ public byte[] NarrowToByte() { return res; } + @Test + @IR(applyIfCPUFeature = { "avx", "true" }, + applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"}, + counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" }) + public byte[] narrowShortToByte() { + byte[] res = new byte[SIZE]; + for (int i = 0; i < SIZE; i++) { + res[i] = (byte) shorts[i]; + } + return res; + } + // ---------------- Convert I/L to F/D ---------------- @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "avx", "true", "rvv", "true"}, @@ -268,7 +288,7 @@ public double[] convertShortToDouble() { @Test @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. + // Subword vector casts with char do not work currently, see JDK-8349562. // Assert the vectorization failure so that we are reminded to update // the test when this limitation is addressed in the future. public float[] convertCharToFloat() { @@ -281,7 +301,7 @@ public float[] convertCharToFloat() { @Test @IR(failOn = {IRNode.STORE_VECTOR}) - // Subword vector casts do not work currently, see JDK-8342095. + // Subword vector casts with char do not work currently, see JDK-8349562. // Assert the vectorization failure so that we are reminded to update // the test when this limitation is addressed in the future. public double[] convertCharToDouble() { diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java index 63739584558be..b81d90484f31b 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java @@ -210,10 +210,10 @@ public short[] vectorSignedShiftRight() { return res; } + // Min/Max vectorization requires a cast from subword to int and back to subword, to avoid losing the higher order bits. + @Test - // Note that min operations on subword types cannot be vectorized - // because higher bits will be lost. - @IR(failOn = {IRNode.STORE_VECTOR}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public short[] vectorMin() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -223,9 +223,7 @@ public short[] vectorMin() { } @Test - // Note that max operations on subword types cannot be vectorized - // because higher bits will be lost. - @IR(failOn = {IRNode.STORE_VECTOR}) + @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE_ANY, ">0" }) public short[] vectorMax() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java new file mode 100644 index 0000000000000..424220f789631 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.*; + +import java.util.concurrent.TimeUnit; +import java.util.Random; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(value = 3) +public class VectorSubword { + @Param({"1024"}) + public int SIZE; + + private byte[] bytes; + private short[] shorts; + private char[] chars; + private int[] ints; + private long[] longs; + + @Setup + public void init() { + bytes = new byte[SIZE]; + shorts = new short[SIZE]; + chars = new char[SIZE]; + ints = new int[SIZE]; + longs = new long[SIZE]; + } + + // Narrowing + + @Benchmark + public void shortToByte() { + for (int i = 0; i < SIZE; i++) { + bytes[i] = (byte) shorts[i]; + } + } + + @Benchmark + public void shortToChar() { + for (int i = 0; i < SIZE; i++) { + chars[i] = (char) shorts[i]; + } + } + + @Benchmark + public void charToByte() { + for (int i = 0; i < SIZE; i++) { + bytes[i] = (byte) chars[i]; + } + } + + @Benchmark + public void charToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = (short) chars[i]; + } + } + + @Benchmark + public void intToByte() { + for (int i = 0; i < SIZE; i++) { + bytes[i] = (byte) ints[i]; + } + } + + @Benchmark + public void intToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = (short) ints[i]; + } + } + + @Benchmark + public void intToChar() { + for (int i = 0; i < SIZE; i++) { + chars[i] = (char) ints[i]; + } + } + + @Benchmark + public void longToByte() { + for (int i = 0; i < SIZE; i++) { + bytes[i] = (byte) longs[i]; + } + } + + @Benchmark + public void longToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = (short) longs[i]; + } + } + + @Benchmark + public void longToChar() { + for (int i = 0; i < SIZE; i++) { + chars[i] = (char) longs[i]; + } + } + + @Benchmark + public void longToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = (int) longs[i]; + } + } + + // Widening + + @Benchmark + public void byteToShort() { + for (int i = 0; i < SIZE; i++) { + shorts[i] = bytes[i]; + } + } + + @Benchmark + public void byteToChar() { + for (int i = 0; i < SIZE; i++) { + chars[i] = (char) bytes[i]; + } + } + + @Benchmark + public void byteToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = bytes[i]; + } + } + + @Benchmark + public void byteToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = bytes[i]; + } + } + + @Benchmark + public void shortToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = shorts[i]; + } + } + + @Benchmark + public void shortToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = shorts[i]; + } + } + + @Benchmark + public void charToInt() { + for (int i = 0; i < SIZE; i++) { + ints[i] = chars[i]; + } + } + + @Benchmark + public void charToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = chars[i]; + } + } + + @Benchmark + public void intToLong() { + for (int i = 0; i < SIZE; i++) { + longs[i] = ints[i]; + } + } + +}