Skip to content

Commit

Permalink
[ARM] Add patterns for select(p, BinOp(x, y), z) -> BinOpT(x, y,p z)
Browse files Browse the repository at this point in the history
Most MVE instructions can be predicated to fold a select into the
instruction, using the predicate and the selects else as a passthough.
This adds tablegen patterns for most two operand instructions using the
newly added TwoOpPattern from 1030e82.

Differential Revision: https://reviews.llvm.org/D83222
  • Loading branch information
davemgreen committed Jul 22, 2020
1 parent ce6de37 commit 3533e0a
Show file tree
Hide file tree
Showing 7 changed files with 1,588 additions and 1,433 deletions.
26 changes: 24 additions & 2 deletions llvm/lib/Target/ARM/ARMInstrMVE.td
Expand Up @@ -325,7 +325,18 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;

// Predicated
// Predicated with select
if !ne(VTI.Size, 0b11) then {
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
(VTI.Vec MQPR:$Qn))),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
ARMVCCThen, (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive)))>;
}

// Predicated with intrinsic
def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)),
PredOperands,
(? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
Expand All @@ -340,7 +351,18 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredIn
def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>;

// Predicated
// Predicated with select
if !ne(VTI.Size, 0b11) then {
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
(VTI.Vec (ARMvdup rGPR:$Rn)))),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
ARMVCCThen, (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive)))>;
}

// Predicated with intrinsic
def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))),
PredOperands,
(? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
Expand Down
Expand Up @@ -99,8 +99,9 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a,
; CHECK-LABEL: vpsel_mul_reduce_add_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: ldr.w r12, [sp, #20]
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: ldr.w r12, [sp, #40]
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: beq .LBB1_4
; CHECK-NEXT: @ %bb.1: @ %vector.ph
Expand All @@ -116,19 +117,17 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a,
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r12
; CHECK-NEXT: and r5, r4, #15
; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrwt.u32 q1, [r3], #16
; CHECK-NEXT: vldrwt.u32 q2, [r2], #16
; CHECK-NEXT: vdup.32 q3, r5
; CHECK-NEXT: vsub.i32 q1, q2, q1
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrwt.u32 q2, [r1], #16
; CHECK-NEXT: vcmp.i32 eq, q3, zr
; CHECK-NEXT: vpsttt
; CHECK-NEXT: vldrwt.u32 q1, [r1], #16
; CHECK-NEXT: vldrwt.u32 q2, [r3], #16
; CHECK-NEXT: vldrwt.u32 q3, [r2], #16
; CHECK-NEXT: vdup.32 q4, r5
; CHECK-NEXT: vpt.i32 eq, q4, zr
; CHECK-NEXT: vsubt.i32 q1, q3, q2
; CHECK-NEXT: adds r4, #4
; CHECK-NEXT: vpsel q1, q1, q2
; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrwt.u32 q2, [r0], #16
; CHECK-NEXT: vmul.i32 q1, q1, q2
Expand All @@ -138,11 +137,12 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a,
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: b .LBB1_5
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r7, pc}
i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) {
entry:
Expand Down

0 comments on commit 3533e0a

Please sign in to comment.