Skip to content
Permalink
Browse files

8241040: Support for AVX-512 Ternary Logic Instruction

A new pass has been added which folds expression tree involving vector boolean logic operations into a MacroLogic node.

Reviewed-by: vlivanov, neliasso
  • Loading branch information
Jatin Bhateja committed Apr 2, 2020
1 parent fb56759 commit 5532b27d221aa234f0cb6843f69ba797021782bd
@@ -6216,6 +6216,30 @@ void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vecto
emit_operand(dst, src);
}

void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x25);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}

void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x25);
emit_operand(dst, src3);
emit_int8(imm8);
}

// vinserti forms

@@ -2198,6 +2198,9 @@ class Assembler : public AbstractAssembler {
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// Ternary logic instruction.
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);

// vinserti forms
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
@@ -3757,6 +3757,14 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file,
BLOCK_COMMENT("} verify_oop");
}

void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
vpternlogd(dst, 0xFF, dst, dst, vector_len);
} else {
assert(UseAVX > 0, "");
vpcmpeqb(dst, dst, dst, vector_len);
}
}

RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
Register tmp,
@@ -1721,6 +1721,8 @@ class MacroAssembler: public Assembler {
void cache_wb(Address line);
void cache_wbsync(bool is_pre);
#endif // _LP64

void vallones(XMMRegister dst, int vector_len);
};

/**
@@ -1363,6 +1363,11 @@ const bool Matcher::match_rule_supported(int opcode) {
return false; // 128bit vroundpd is not available
}
break;
case Op_MacroLogicV:
if (UseAVX < 3 || !UseVectorMacroLogic) {
return false;
}
break;
#ifndef _LP64
case Op_AddReductionVF:
case Op_AddReductionVD:
@@ -1408,6 +1413,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
// * implementation limitations
// * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
// * 128bit vroundpd instruction is present only in AVX1
int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
switch (opcode) {
case Op_AbsVF:
case Op_NegVF:
@@ -1426,6 +1432,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false; // implementation limitation (only vcmov8F_reg is present)
}
break;
case Op_MacroLogicV:
if (!VM_Version::supports_evex() ||
((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
return false;
}
break;
case Op_CMoveVD:
if (vlen != 4) {
return false; // implementation limitation (only vcmov4D_reg is present)
@@ -3356,6 +3368,20 @@ instruct ReplI_zero(vec dst, immI0 zero) %{
ins_pipe( fpu_reg_reg );
%}

instruct ReplI_M1(vec dst, immI_M1 con) %{
predicate(UseAVX > 0);
match(Set dst (ReplicateB con));
match(Set dst (ReplicateS con));
match(Set dst (ReplicateI con));
effect(TEMP dst);
format %{ "vallones $dst" %}
ins_encode %{
int vector_len = vector_length_encoding(this);
__ vallones($dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}

// ====================ReplicateL=======================================

#ifdef _LP64
@@ -3488,6 +3514,18 @@ instruct ReplL_zero(vec dst, immL0 zero) %{
ins_pipe( fpu_reg_reg );
%}

instruct ReplL_M1(vec dst, immL_M1 con) %{
predicate(UseAVX > 0);
match(Set dst (ReplicateL con));
effect(TEMP dst);
format %{ "vallones $dst" %}
ins_encode %{
int vector_len = vector_length_encoding(this);
__ vallones($dst$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}

// ====================ReplicateF=======================================

instruct ReplF_reg(vec dst, vlRegF src) %{
@@ -5154,3 +5192,27 @@ instruct vpopcountI(vec dst, vec src) %{
%}
ins_pipe( pipe_slow );
%}

// --------------------------------- Bitwise Ternary Logic ----------------------------------

instruct vpternlogdB(vec dst, vec src2, vec src3, immU8 func) %{
match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
effect(TEMP dst);
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
ins_encode %{
int vector_len = vector_length_encoding(this);
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}

instruct vpternlogdB_mem(vec dst, vec src2, memory src3, immU8 func) %{
match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
effect(TEMP dst);
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
ins_encode %{
int vector_len = vector_length_encoding(this);
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -4168,7 +4168,7 @@ bool MatchRule::is_vector() const {
"MulReductionVF", "MulReductionVD",
"MaxReductionV", "MinReductionV",
"AndReductionV", "OrReductionV", "XorReductionV",
"MulAddVS2VI",
"MulAddVS2VI", "MacroLogicV",
"LShiftCntV","RShiftCntV",
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
@@ -186,6 +186,9 @@
notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \
"Trace what Superword Level Parallelism analysis applies") \
\
diagnostic(bool, UseVectorMacroLogic, true, \
"Use ternary macro logic instructions") \
\
product(intx, LoopUnrollMin, 4, \
"Minimum number of unroll loop bodies before checking progress" \
"of rounds of unroll,optimize,..") \
@@ -312,6 +312,7 @@ macro(SubI)
macro(SubL)
macro(TailCall)
macro(TailJump)
macro(MacroLogicV)
macro(ThreadLocal)
macro(Unlock)
macro(URShiftI)

0 comments on commit 5532b27

Please sign in to comment.