Skip to content

Commit

Permalink
[ARM] Expand cannotInsertWDLSTPBetween to the last instruction
Browse files Browse the repository at this point in the history
9d9a11c added this check for predicatable instructions between the
D/WLSTP and the loop's start, but it was missing the last instruction in
the block. Change it to use some iterators instead.

Differential Revision: https://reviews.llvm.org/D88354
  • Loading branch information
davemgreen committed Sep 28, 2020
1 parent 070a1d5 commit e4b9867
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 19 deletions.
8 changes: 3 additions & 5 deletions llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
Expand Up @@ -629,17 +629,15 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
// width, the Loop Start instruction will immediately generate one or more
// false lane mask which can, incorrectly, affect the proceeding MVE
// instructions in the preheader.
auto cannotInsertWDLSTPBetween = [](MachineInstr *Begin,
MachineInstr *End) {
auto I = MachineBasicBlock::iterator(Begin);
auto E = MachineBasicBlock::iterator(End);
auto cannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
for (; I != E; ++I)
if (shouldInspect(*I))
return true;
return false;
};

if (cannotInsertWDLSTPBetween(StartInsertPt, &InsertBB->back()))
if (cannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end()))
return false;

// Especially in the case of while loops, InsertBB may not be the
Expand Down
29 changes: 20 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
Expand Up @@ -46,6 +46,7 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
; CHECK: tCMPi8 renamable $r1, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: renamable $r12 = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: tBcc %bb.2, 2 /* CC::hs */, killed $cpsr
; CHECK: bb.1:
; CHECK: liveins: $r2
Expand All @@ -54,24 +55,31 @@ body: |
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: liveins: $r0, $r1, $r2
; CHECK: liveins: $r0, $r1, $r2, $r12
; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
; CHECK: tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: $r12 = tMOVr renamable $r1, 11 /* CC::lt */, killed $cpsr, implicit killed renamable $r12, implicit killed $itstate
; CHECK: renamable $r3 = t2SUBrr renamable $r1, killed renamable $r12, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
; CHECK: $r12 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: renamable $r4 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
; CHECK: bb.3:
; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4
; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r3, 0, 0, $noreg
; CHECK: renamable $q0 = nnan ninf nsz MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.3
; CHECK: bb.4:
; CHECK: successors: %bb.5(0x80000000)
; CHECK: liveins: $q0, $r0, $r1, $r2
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: $lr = t2DLS killed $r4
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg
; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0
; CHECK: $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg
Expand All @@ -80,13 +88,16 @@ body: |
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
; CHECK: bb.5:
; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $s4
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $s4
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: $r4 = VMOVRS $s4, 14 /* CC::al */, $noreg
; CHECK: renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
; CHECK: renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 0, $noreg, undef renamable $q2
; CHECK: renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg
; CHECK: MVE_VPST 2, implicit $vpr
; CHECK: renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr
; CHECK: renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 1, renamable $vpr, undef renamable $q2
; CHECK: renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 1, killed renamable $vpr
; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.5
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.5
; CHECK: bb.6:
; CHECK: liveins: $q0, $r1, $r2
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
Expand Down
22 changes: 17 additions & 5 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
Expand Up @@ -6,10 +6,19 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: cmp r1, #4
; CHECK-NEXT: it ge
; CHECK-NEXT: movge r3, #4
; CHECK-NEXT: mov.w r12, #1
; CHECK-NEXT: subs r3, r1, r3
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: adds r3, #3
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dlstp.32 lr, r3
; CHECK-NEXT: mov r4, lr
; CHECK-NEXT: .LBB0_1: @ %do.body.i
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r12], #16
Expand All @@ -18,18 +27,21 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: dlstp.32 lr, r3
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vadd.f32 s0, s3, s3
; CHECK-NEXT: vcvt.f32.u32 s4, s4
; CHECK-NEXT: vdiv.f32 s0, s0, s4
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB0_3: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vsub.f32 q1, q1, r12
; CHECK-NEXT: vfma.f32 q0, q1, q1
; CHECK-NEXT: letp lr, .LBB0_3
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vpsttt
; CHECK-NEXT: vldrwt.u32 q1, [r0], #16
; CHECK-NEXT: vsubt.f32 q1, q1, r12
; CHECK-NEXT: vfmat.f32 q0, q1, q1
; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end
; CHECK-NEXT: subs r0, r1, #1
; CHECK-NEXT: vadd.f32 s0, s3, s3
Expand Down

0 comments on commit e4b9867

Please sign in to comment.