Skip to content

Commit

Permalink
[AArch64][SVE] Add support for using reverse forms of SVE2 shifts
Browse files Browse the repository at this point in the history
When using and ACLE intrinsic for an SVE2 shift, if the predicate passed
has all relevant lanes active, then use a reversed version of the
instruction if beneficial.
  • Loading branch information
brads55 committed Jun 4, 2021
1 parent fd3f251 commit e42ee2d
Show file tree
Hide file tree
Showing 3 changed files with 434 additions and 17 deletions.
31 changes: 19 additions & 12 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2518,18 +2518,25 @@ let Predicates = [HasSVE2] in {
defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>;

// SVE2 saturating/rounding bitwise shift left (predicated)
defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl>;
defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl>;
defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>;
defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>;
defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl>;
defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl>;
defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl>;
defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl>;
defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>;
defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>;
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>;
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>;
defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl, "SRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SRSHLR_ZPmZ">;
defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl, "URSHL_ZPZZ", DestructiveBinaryCommWithRev, "URSHLR_ZPmZ">;
defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag, "SRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SRSHL_ZPmZ", /*isReverseInstr*/ 1>;
defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag, "URSHLR_ZPZZ", DestructiveBinaryCommWithRev, "URSHL_ZPmZ", /*isReverseInstr*/ 1>;
defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl, "SQSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQSHLR_ZPmZ">;
defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl, "UQSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQSHLR_ZPmZ">;
defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl, "SQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHLR_ZPmZ">;
defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl, "UQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHLR_ZPmZ">;
defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag, "SQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQSHL_ZPmZ", /*isReverseInstr*/ 1>;
defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag, "UQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQSHL_ZPmZ", /*isReverseInstr*/ 1>;
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag, "SQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHL_ZPmZ", /*isReverseInstr*/ 1>;
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag, "UQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHL_ZPmZ", /*isReverseInstr*/ 1>;

defm SRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_srshl>;
defm URSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_urshl>;
defm SQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqshl>;
defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>;
defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>;
defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>;

let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in {
defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
Expand Down
37 changes: 32 additions & 5 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,12 @@ class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
: Pat<(vtd (op (pt (SVEAllActive)), vt1:$Op1, vt2:$Op2)),
(inst $Op1, $Op2)>;

class SVE_2_Op_Pred_All_Active_Pt<ValueType vtd, SDPatternOperator op,
ValueType pt, ValueType vt1, ValueType vt2,
Instruction inst>
: Pat<(vtd (op (pt (SVEAllActive:$Op1)), vt1:$Op2, vt2:$Op3)),
(inst $Op1, $Op2, $Op3)>;

class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
Expand Down Expand Up @@ -3167,11 +3173,20 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
let ElementSize = zprty.ElementSize;
}

multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op> {
def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op,
string Ps = "",
DestructiveInstTypeEnum flags=DestructiveOther,
string revname="", bit isReverseInstr=0> {
let DestructiveInstType = flags in {
def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>,
SVEPseudo2Instr<Ps # _B, 1>, SVEInstr2Rev<NAME # _B, revname # _B, isReverseInstr>;
def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>,
SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isReverseInstr>;
def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>,
SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>,
SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
}

def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
Expand Down Expand Up @@ -8139,3 +8154,15 @@ multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
}

multiclass sve_int_bin_pred_all_active_bhsd<SDPatternOperator op> {
def _UNDEF_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;

def : SVE_2_Op_Pred_All_Active_Pt<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
def : SVE_2_Op_Pred_All_Active_Pt<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
def : SVE_2_Op_Pred_All_Active_Pt<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_2_Op_Pred_All_Active_Pt<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
Loading

0 comments on commit e42ee2d

Please sign in to comment.