Skip to content

Commit

Permalink
[ARM] Sink splat to ICmp
Browse files Browse the repository at this point in the history
This adds ICmp to the list of instructions that we sink a splat to in a
loop, allowing the register forms of instructions to be selected more
often. It does not add FCmp yet as the results look a little odd, trying
to keep the register in an float reg and having to move it back to a GPR.

Differential Revision: https://reviews.llvm.org/D70997
  • Loading branch information
davemgreen committed Dec 30, 2019
1 parent a5a1415 commit b4abe7a
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 190 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -14913,6 +14913,7 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Mul:
case Instruction::ICmp:
return true;
case Instruction::Sub:
case Instruction::Shl:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/MVETailPredication.cpp
Expand Up @@ -209,7 +209,7 @@ bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
// The vector icmp
if (!match(I, m_ICmp(Pred, m_Instruction(Induction),
m_Instruction(Shuffle))) ||
Pred != ICmpInst::ICMP_ULE || !L->isLoopInvariant(Shuffle))
Pred != ICmpInst::ICMP_ULE)
return false;

// First find the stuff outside the loop which is setting up the limit
Expand All @@ -231,7 +231,7 @@ bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
if (!match(BECount, m_Add(m_Value(TripCount), m_AllOnes())))
return false;

if (TripCount != NumElements)
if (TripCount != NumElements || !L->isLoopInvariant(BECount))
return false;

// Now back to searching inside the loop body...
Expand Down
Expand Up @@ -450,25 +450,24 @@ define dso_local arm_aapcs_vfpcc void @range_test(i32* noalias nocapture %arg, i
; CHECK-NEXT: add.w r12, r3, #3
; CHECK-NEXT: mov.w lr, #1
; CHECK-NEXT: bic r12, r12, #3
; CHECK-NEXT: vdup.32 q0, r2
; CHECK-NEXT: sub.w r12, r12, #4
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: add.w lr, lr, r12, lsr #2
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB5_1: @ %bb12
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrwt.u32 q1, [r2], #16
; CHECK-NEXT: vpttt.s32 ge, q0, q1
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
; CHECK-NEXT: vldrwt.u32 q0, [r12], #16
; CHECK-NEXT: vpttt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q0, r2
; CHECK-NEXT: vctpt.32 r3
; CHECK-NEXT: vldrwt.u32 q2, [r1], #16
; CHECK-NEXT: vldrwt.u32 q1, [r1], #16
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vmul.i32 q1, q2, q1
; CHECK-NEXT: vmul.i32 q0, q1, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q1, [r0]
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: vstrwt.32 q0, [r0]
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: le lr, .LBB5_1
; CHECK-NEXT: @ %bb.2: @ %bb32
; CHECK-NEXT: pop {r7, pc}
Expand Down

0 comments on commit b4abe7a

Please sign in to comment.