Skip to content
Permalink
Browse files
8271368: [BACKOUT] JDK-8266054 VectorAPI rotate operation optimization
Reviewed-by: dholmes, iklam
  • Loading branch information
Vladimir Kozlov committed Jul 28, 2021
1 parent ecd4455 commit d7b5cb688956ce79443ef3cd080c36028fcfb19d
Showing with 219 additions and 4,380 deletions.
  1. +0 −5 src/hotspot/cpu/aarch64/matcher_aarch64.hpp
  2. +0 −5 src/hotspot/cpu/arm/matcher_arm.hpp
  3. +0 −5 src/hotspot/cpu/ppc/matcher_ppc.hpp
  4. +0 −5 src/hotspot/cpu/s390/matcher_s390.hpp
  5. +0 −5 src/hotspot/cpu/x86/matcher_x86.hpp
  6. +0 −3 src/hotspot/cpu/x86/x86.ad
  7. +0 −1 src/hotspot/share/opto/library_call.hpp
  8. +2 −1 src/hotspot/share/opto/superword.cpp
  9. +11 −86 src/hotspot/share/opto/vectorIntrinsics.cpp
  10. +23 −67 src/hotspot/share/opto/vectornode.cpp
  11. +0 −2 src/hotspot/share/opto/vectornode.hpp
  12. +0 −20 src/hotspot/share/prims/vectorSupport.cpp
  13. +0 −4 src/hotspot/share/prims/vectorSupport.hpp
  14. +0 −4 src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
  15. +11 −21 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
  16. +0 −1 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
  17. +0 −1 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
  18. +11 −21 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
  19. +11 −21 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
  20. +11 −21 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
  21. +2 −2 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorOperators.java
  22. +11 −31 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
  23. +6 −186 test/jdk/jdk/incubator/vector/Byte128VectorTests.java
  24. +6 −186 test/jdk/jdk/incubator/vector/Byte256VectorTests.java
  25. +6 −186 test/jdk/jdk/incubator/vector/Byte512VectorTests.java
  26. +6 −186 test/jdk/jdk/incubator/vector/Byte64VectorTests.java
  27. +6 −186 test/jdk/jdk/incubator/vector/ByteMaxVectorTests.java
  28. +0 −8 test/jdk/jdk/incubator/vector/Double128VectorTests.java
  29. +0 −8 test/jdk/jdk/incubator/vector/Double256VectorTests.java
  30. +0 −8 test/jdk/jdk/incubator/vector/Double512VectorTests.java
  31. +0 −8 test/jdk/jdk/incubator/vector/Double64VectorTests.java
  32. +0 −8 test/jdk/jdk/incubator/vector/DoubleMaxVectorTests.java
  33. +0 −8 test/jdk/jdk/incubator/vector/Float128VectorTests.java
  34. +0 −8 test/jdk/jdk/incubator/vector/Float256VectorTests.java
  35. +0 −8 test/jdk/jdk/incubator/vector/Float512VectorTests.java
  36. +0 −8 test/jdk/jdk/incubator/vector/Float64VectorTests.java
  37. +0 −8 test/jdk/jdk/incubator/vector/FloatMaxVectorTests.java
  38. +6 −186 test/jdk/jdk/incubator/vector/Int128VectorTests.java
  39. +6 −186 test/jdk/jdk/incubator/vector/Int256VectorTests.java
  40. +6 −186 test/jdk/jdk/incubator/vector/Int512VectorTests.java
  41. +6 −186 test/jdk/jdk/incubator/vector/Int64VectorTests.java
  42. +6 −186 test/jdk/jdk/incubator/vector/IntMaxVectorTests.java
  43. +6 −186 test/jdk/jdk/incubator/vector/Long128VectorTests.java
  44. +6 −186 test/jdk/jdk/incubator/vector/Long256VectorTests.java
  45. +6 −186 test/jdk/jdk/incubator/vector/Long512VectorTests.java
  46. +6 −186 test/jdk/jdk/incubator/vector/Long64VectorTests.java
  47. +6 −186 test/jdk/jdk/incubator/vector/LongMaxVectorTests.java
  48. +6 −186 test/jdk/jdk/incubator/vector/Short128VectorTests.java
  49. +6 −186 test/jdk/jdk/incubator/vector/Short256VectorTests.java
  50. +6 −186 test/jdk/jdk/incubator/vector/Short512VectorTests.java
  51. +6 −186 test/jdk/jdk/incubator/vector/Short64VectorTests.java
  52. +6 −186 test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java
  53. +4 −8 test/jdk/jdk/incubator/vector/gen-template.sh
  54. +1 −1 test/jdk/jdk/incubator/vector/templates/Unit-Shift-Masked-op.template
  55. +1 −1 test/jdk/jdk/incubator/vector/templates/Unit-Shift-op.template
  56. +0 −24 test/jdk/jdk/incubator/vector/templates/Unit-header.template
  57. +0 −214 test/micro/org/openjdk/bench/jdk/incubator/vector/RotateBenchmark.java
@@ -138,11 +138,6 @@
return false;
}

// Does the CPU supports vector constant rotate instructions?
static constexpr bool supports_vector_constant_rotates(int shift) {
return false;
}

// Does the CPU supports vector unsigned comparison instructions?
static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
// Not supported on SVE yet.
@@ -131,11 +131,6 @@
return false; // not supported
}

// Does the CPU supports vector constant rotate instructions?
static constexpr bool supports_vector_constant_rotates(int shift) {
return false;
}

// Does the CPU supports vector unsigned comparison instructions?
static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
return false;
@@ -138,11 +138,6 @@
return false;
}

// Does the CPU supports vector constant rotate instructions?
static constexpr bool supports_vector_constant_rotates(int shift) {
return false;
}

// Does the CPU supports vector unsigned comparison instructions?
static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
return false;
@@ -128,11 +128,6 @@
return false;
}

// Does the CPU supports vector constant rotate instructions?
static constexpr bool supports_vector_constant_rotates(int shift) {
return false;
}

// Does the CPU supports vector unsigned comparison instructions?
static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
return false;
@@ -158,11 +158,6 @@
return true;
}

// Does the CPU supports vector constant rotate instructions?
static constexpr bool supports_vector_constant_rotates(int shift) {
return -0x80 <= shift && shift < 0x80;
}

// Does the CPU supports vector unsigned comparison instructions?
static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
int vlen_in_bytes = vlen * type2aelembytes(bt);
@@ -1638,9 +1638,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_RotateRightV:
case Op_RotateLeftV:
if (bt != T_INT && bt != T_LONG) {
return false;
} // fallthrough
case Op_MacroLogicV:
if (!VM_Version::supports_evex() ||
((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
@@ -336,7 +336,6 @@ class LibraryCallKit : public GraphKit {
};

bool arch_supports_vector(int op, int num_elem, BasicType type, VectorMaskUseType mask_use_type, bool has_scalar_args = false);
bool arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, bool has_scalar_args = false);

void clear_upper_avx() {
#ifdef X86
@@ -2488,8 +2488,9 @@ void SuperWord::output() {
} else if (VectorNode::is_scalar_rotate(n)) {
Node* in1 = low_adr->in(1);
Node* in2 = p->at(0)->in(2);
assert(in2->bottom_type()->isa_int(), "Shift must always be an int value");
// If rotation count is non-constant or greater than 8bit value create a vector.
if (!in2->is_Con() || !Matcher::supports_vector_constant_rotates(in2->get_int())) {
if (!in2->is_Con() || -0x80 > in2->get_int() || in2->get_int() >= 0x80) {
in2 = vector_opd(p, 2);
}
vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
@@ -59,48 +59,6 @@ static bool check_vbox(const TypeInstPtr* vbox_type) {
}
#endif

bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, bool has_scalar_args) {
bool is_supported = true;
// has_scalar_args flag is true only for non-constant scalar shift count,
// since in this case shift needs to be broadcasted.
if (!Matcher::match_rule_supported_vector(opc, num_elem, elem_bt) ||
(has_scalar_args &&
!arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed))) {
is_supported = false;
}

int lshiftopc, rshiftopc;
switch(elem_bt) {
case T_BYTE:
lshiftopc = Op_LShiftI;
rshiftopc = Op_URShiftB;
break;
case T_SHORT:
lshiftopc = Op_LShiftI;
rshiftopc = Op_URShiftS;
break;
case T_INT:
lshiftopc = Op_LShiftI;
rshiftopc = Op_URShiftI;
break;
case T_LONG:
lshiftopc = Op_LShiftL;
rshiftopc = Op_URShiftL;
break;
default:
assert(false, "Unexpected type");
}
int lshiftvopc = VectorNode::opcode(lshiftopc, elem_bt);
int rshiftvopc = VectorNode::opcode(rshiftopc, elem_bt);
if (!is_supported &&
arch_supports_vector(lshiftvopc, num_elem, elem_bt, VecMaskNotUsed) &&
arch_supports_vector(rshiftvopc, num_elem, elem_bt, VecMaskNotUsed) &&
arch_supports_vector(Op_OrV, num_elem, elem_bt, VecMaskNotUsed)) {
is_supported = true;
}
return is_supported;
}

Node* GraphKit::box_vector(Node* vector, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool deoptimize_on_exception) {
assert(EnableVectorSupport, "");

@@ -154,29 +112,17 @@ bool LibraryCallKit::arch_supports_vector(int sopc, int num_elem, BasicType type
return false;
}

if (VectorNode::is_vector_rotate(sopc)) {
if(!arch_supports_vector_rotate(sopc, num_elem, type, has_scalar_args)) {
// Check that architecture supports this op-size-type combination.
if (!Matcher::match_rule_supported_vector(sopc, num_elem, type)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
tty->print_cr(" ** Rejected vector op (%s,%s,%d) because architecture does not support variable vector shifts",
NodeClassNames[sopc], type2name(type), num_elem);
}
#endif
return false;
if (C->print_intrinsics()) {
tty->print_cr(" ** Rejected vector op (%s,%s,%d) because architecture does not support it",
NodeClassNames[sopc], type2name(type), num_elem);
}
} else {
// Check that architecture supports this op-size-type combination.
if (!Matcher::match_rule_supported_vector(sopc, num_elem, type)) {
#ifndef PRODUCT
if (C->print_intrinsics()) {
tty->print_cr(" ** Rejected vector op (%s,%s,%d) because architecture does not support it",
NodeClassNames[sopc], type2name(type), num_elem);
}
#endif
return false;
} else {
assert(Matcher::match_rule_supported(sopc), "must be supported");
}
return false;
} else {
assert(Matcher::match_rule_supported(sopc), "must be supported");
}

if (num_elem == 1) {
@@ -1554,9 +1500,7 @@ bool LibraryCallKit::inline_vector_broadcast_int() {
BasicType elem_bt = elem_type->basic_type();
int num_elem = vlen->get_con();
int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
bool is_shift = VectorNode::is_shift_opcode(opc);
bool is_rotate = VectorNode::is_rotate_opcode(opc);
if (opc == 0 || (!is_shift && !is_rotate)) {
if (opc == 0 || !VectorNode::is_shift_opcode(opc)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** operation not supported: op=%d bt=%s", opr->get_con(), type2name(elem_bt));
}
@@ -1569,37 +1513,18 @@ bool LibraryCallKit::inline_vector_broadcast_int() {
}
return false; // operation not supported
}
Node* cnt = argument(5);
ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
const TypeInt* cnt_type = cnt->bottom_type()->isa_int();

// If CPU supports vector constant rotate instructions pass it directly
bool is_const_rotate = is_rotate && cnt_type && cnt_type->is_con() &&
Matcher::supports_vector_constant_rotates(cnt_type->get_con());
bool has_scalar_args = is_rotate ? !is_const_rotate : true;
if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args)) {
if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed, true /*has_scalar_args*/)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** not supported: arity=0 op=int/%d vlen=%d etype=%s ismask=no",
sopc, num_elem, type2name(elem_bt));
}
return false; // not supported
}
Node* opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
Node* opd2 = NULL;
if (is_shift) {
opd2 = vector_shift_count(cnt, opc, elem_bt, num_elem);
} else {
assert(is_rotate, "unexpected operation");
if (!is_const_rotate) {
const Type * type_bt = Type::get_const_basic_type(elem_bt);
cnt = elem_bt == T_LONG ? gvn().transform(new ConvI2LNode(cnt)) : cnt;
opd2 = gvn().transform(VectorNode::scalar2vector(cnt, num_elem, type_bt));
} else {
// Constant shift value.
opd2 = cnt;
}
}
Node* opd2 = vector_shift_count(argument(5), opc, elem_bt, num_elem);
if (opd1 == NULL || opd2 == NULL) {
return false;
}
@@ -142,9 +142,9 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case Op_RoundDoubleMode:
return (bt == T_DOUBLE ? Op_RoundDoubleModeV : 0);
case Op_RotateLeft:
return (is_integral_type(bt) ? Op_RotateLeftV : 0);
return (bt == T_LONG || bt == T_INT ? Op_RotateLeftV : 0);
case Op_RotateRight:
return (is_integral_type(bt) ? Op_RotateRightV : 0);
return (bt == T_LONG || bt == T_INT ? Op_RotateRightV : 0);
case Op_SqrtF:
return (bt == T_FLOAT ? Op_SqrtVF : 0);
case Op_SqrtD:
@@ -261,7 +261,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
// For rotate operation we will do a lazy de-generation into
// OrV/LShiftV/URShiftV pattern if the target does not support
// vector rotation instruction.
if (VectorNode::is_vector_rotate(vopc)) {
if (vopc == Op_RotateLeftV || vopc == Op_RotateRightV) {
return is_vector_rotate_supported(vopc, vlen, bt);
}
return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt);
@@ -295,8 +295,15 @@ bool VectorNode::is_roundopD(Node* n) {
return false;
}

bool VectorNode::is_scalar_rotate(Node* n) {
if (n->Opcode() == Op_RotateLeft || n->Opcode() == Op_RotateRight) {
return true;
}
return false;
}

bool VectorNode::is_vector_rotate_supported(int vopc, uint vlen, BasicType bt) {
assert(VectorNode::is_vector_rotate(vopc), "wrong opcode");
assert(vopc == Op_RotateLeftV || vopc == Op_RotateRightV, "wrong opcode");

// If target defines vector rotation patterns then no
// need for degeneration.
@@ -340,23 +347,6 @@ bool VectorNode::is_shift(Node* n) {
return is_shift_opcode(n->Opcode());
}

bool VectorNode::is_rotate_opcode(int opc) {
switch (opc) {
case Op_RotateRight:
case Op_RotateLeft:
return true;
default:
return false;
}
}

bool VectorNode::is_scalar_rotate(Node* n) {
if (is_rotate_opcode(n->Opcode())) {
return true;
}
return false;
}

bool VectorNode::is_vshift_cnt(Node* n) {
switch (n->Opcode()) {
case Op_LShiftCntV:
@@ -588,16 +578,6 @@ VectorNode* VectorNode::shift_count(int opc, Node* cnt, uint vlen, BasicType bt)
}
}

bool VectorNode::is_vector_rotate(int opc) {
switch (opc) {
case Op_RotateLeftV:
case Op_RotateRightV:
return true;
default:
return false;
}
}

bool VectorNode::is_vector_shift(int opc) {
assert(opc > _last_machine_leaf && opc < _last_opcode, "invalid opcode");
switch (opc) {
@@ -1151,66 +1131,42 @@ MacroLogicVNode* MacroLogicVNode::make(PhaseGVN& gvn, Node* in1, Node* in2, Node

Node* VectorNode::degenerate_vector_rotate(Node* src, Node* cnt, bool is_rotate_left,
int vlen, BasicType bt, PhaseGVN* phase) {
assert(is_integral_type(bt), "sanity");
assert(bt == T_INT || bt == T_LONG, "sanity");
const TypeVect* vt = TypeVect::make(bt, vlen);

int shift_mask = (type2aelembytes(bt) * 8) - 1;
int shiftLOpc = (bt == T_LONG) ? Op_LShiftL : Op_LShiftI;
auto urshiftopc = [=]() {
switch(bt) {
case T_INT: return Op_URShiftI;
case T_LONG: return Op_URShiftL;
case T_BYTE: return Op_URShiftB;
case T_SHORT: return Op_URShiftS;
default: return (Opcodes)0;
}
};
int shiftROpc = urshiftopc();
int shift_mask = (bt == T_INT) ? 0x1F : 0x3F;
int shiftLOpc = (bt == T_INT) ? Op_LShiftI : Op_LShiftL;
int shiftROpc = (bt == T_INT) ? Op_URShiftI: Op_URShiftL;

// Compute shift values for right rotation and
// later swap them in case of left rotation.
Node* shiftRCnt = NULL;
Node* shiftLCnt = NULL;
const TypeInt* cnt_type = cnt->bottom_type()->isa_int();
bool is_binary_vector_op = false;
if (cnt_type && cnt_type->is_con()) {
// Constant shift.
int shift = cnt_type->get_con() & shift_mask;
if (cnt->is_Con() && cnt->bottom_type()->isa_int()) {
// Constant shift case.
int shift = cnt->get_int() & shift_mask;
shiftRCnt = phase->intcon(shift);
shiftLCnt = phase->intcon(shift_mask + 1 - shift);
} else if (VectorNode::is_invariant_vector(cnt)) {
// Scalar variable shift, handle replicates generated by auto vectorizer.
} else {
// Variable shift case.
assert(VectorNode::is_invariant_vector(cnt), "Broadcast expected");
cnt = cnt->in(1);
if (bt == T_LONG) {
// Shift count vector for Rotate vector has long elements too.
assert(cnt->Opcode() == Op_ConvI2L, "ConvI2L expected");
cnt = cnt->in(1);
}
shiftRCnt = cnt;
shiftRCnt = phase->transform(new AndINode(cnt, phase->intcon(shift_mask)));
shiftLCnt = phase->transform(new SubINode(phase->intcon(shift_mask + 1), shiftRCnt));
} else {
// Vector variable shift.
assert(cnt->bottom_type()->isa_vect(), "Unexpected shift");
const Type* elem_ty = Type::get_const_basic_type(bt);
Node* shift_mask_node = (bt == T_LONG) ? (Node*)(phase->longcon(shift_mask + 1L)) :
(Node*)(phase->intcon(shift_mask + 1));
Node* vector_mask = phase->transform(VectorNode::scalar2vector(shift_mask_node,vlen, elem_ty));
int subVopc = VectorNode::opcode((bt == T_LONG) ? Op_SubL : Op_SubI, bt);
shiftRCnt = cnt;
shiftLCnt = phase->transform(VectorNode::make(subVopc, vector_mask, shiftRCnt, vt));
is_binary_vector_op = true;
}

// Swap the computed left and right shift counts.
if (is_rotate_left) {
swap(shiftRCnt,shiftLCnt);
}

if (!is_binary_vector_op) {
shiftLCnt = phase->transform(new LShiftCntVNode(shiftLCnt, vt));
shiftRCnt = phase->transform(new RShiftCntVNode(shiftRCnt, vt));
}
shiftLCnt = phase->transform(new LShiftCntVNode(shiftLCnt, vt));
shiftRCnt = phase->transform(new RShiftCntVNode(shiftRCnt, vt));

return new OrVNode(phase->transform(VectorNode::make(shiftLOpc, src, shiftLCnt, vlen, bt)),
phase->transform(VectorNode::make(shiftROpc, src, shiftRCnt, vlen, bt)),

1 comment on commit d7b5cb6

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on d7b5cb6 Jul 28, 2021

Please sign in to comment.