Skip to content

Commit

Permalink
[ARM] Fix CPSR liveness in tMOVCCr_pseudo lowering.
Browse files Browse the repository at this point in the history
The lowering was missing live-ins in certain cases, like a sequence of
multiple tMOVCCr_pseudo instructions.  This would lead to a verifier
failure, and on pre-v6 Thumb CPSR would be incorrectly clobbered.

For reasons I don't completely understand, it's hard to get a sequence
of multiple tMOVCCr_pseudo instructions; the issue only seems to show up
with 64-bit comparisons where the result is zero-extended. I added some
extra testcases in case that changes in the future. Probably some
optimization opportunities here if anyone is interested. (@test_slt_not
is the case that was getting miscompiled.)

The code to check the liveness of CPSR was stolen from
X86ISelLowering.cpp; maybe it could be refactored into common helper,
but I have no idea where to put it.

Differential Revision: https://reviews.llvm.org/D54192

llvm-svn: 346355
  • Loading branch information
Eli Friedman committed Nov 7, 2018
1 parent 3c5d239 commit 7d7d41d
Show file tree
Hide file tree
Showing 3 changed files with 290 additions and 5 deletions.
44 changes: 44 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -9160,6 +9160,42 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
return ContBB;
}

// The CPSR operand of SelectItr might be missing a kill marker
// because there were multiple uses of CPSR, and ISel didn't know
// which to mark. Figure out whether SelectItr should have had a
// kill marker, and set it if it should. Returns the correct kill
// marker value.
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock* BB,
const TargetRegisterInfo* TRI) {
// Scan forward through BB for a use/def of CPSR.
MachineBasicBlock::iterator miI(std::next(SelectItr));
for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
const MachineInstr& mi = *miI;
if (mi.readsRegister(ARM::CPSR))
return false;
if (mi.definesRegister(ARM::CPSR))
break; // Should have kill-flag - update below.
}

// If we hit the end of the block, check whether CPSR is live into a
// successor.
if (miI == BB->end()) {
for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
sEnd = BB->succ_end();
sItr != sEnd; ++sItr) {
MachineBasicBlock* succ = *sItr;
if (succ->isLiveIn(ARM::CPSR))
return false;
}
}

// We found a def, or hit the end of the basic block and CPSR wasn't live
// out. SelectMI should have a kill flag on CPSR.
SelectItr->addRegisterKilled(ARM::CPSR, TRI);
return true;
}

MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
Expand Down Expand Up @@ -9259,6 +9295,14 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);

// Check whether CPSR is live past the tMOVCCr_pseudo.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (!MI.killsRegister(ARM::CPSR) &&
!checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
copy0MBB->addLiveIn(ARM::CPSR);
sinkMBB->addLiveIn(ARM::CPSR);
}

// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
Expand Down
226 changes: 223 additions & 3 deletions llvm/test/CodeGen/ARM/wide-compares.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=armv7-unknown-linux < %s | FileCheck --check-prefix=CHECK-ARM %s
; RUN: llc -mtriple=thumbv6-unknown-linux < %s | FileCheck --check-prefix=CHECK-THUMB1 %s
; RUN: llc -mtriple=thumbv7-unknown-linux < %s | FileCheck --check-prefix=CHECK-THUMB2 %s
; RUN: llc -mtriple=armv7-unknown-linux < %s -verify-machineinstrs | FileCheck --check-prefix=CHECK-ARM %s
; RUN: llc -mtriple=thumb-eabi < %s -verify-machineinstrs | FileCheck --check-prefix=CHECK-THUMB1-NOMOV %s
; RUN: llc -mtriple=thumbv6-unknown-linux < %s -verify-machineinstrs | FileCheck --check-prefix=CHECK-THUMB1 %s
; RUN: llc -mtriple=thumbv7-unknown-linux < %s -verify-machineinstrs | FileCheck --check-prefix=CHECK-THUMB2 %s

define i32 @test_slt1(i64 %a, i64 %b) {
; CHECK-ARM-LABEL: test_slt1:
Expand All @@ -13,6 +14,18 @@ define i32 @test_slt1(i64 %a, i64 %b) {
; CHECK-ARM-NEXT: mov r0, r12
; CHECK-ARM-NEXT: bx lr
;
; CHECK-THUMB1-NOMOV-LABEL: test_slt1:
; CHECK-THUMB1-NOMOV: @ %bb.0: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: subs r0, r0, r2
; CHECK-THUMB1-NOMOV-NEXT: sbcs r1, r3
; CHECK-THUMB1-NOMOV-NEXT: bge .LBB0_2
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %bb1
; CHECK-THUMB1-NOMOV-NEXT: movs r0, #1
; CHECK-THUMB1-NOMOV-NEXT: bx lr
; CHECK-THUMB1-NOMOV-NEXT: .LBB0_2: @ %bb2
; CHECK-THUMB1-NOMOV-NEXT: movs r0, #2
; CHECK-THUMB1-NOMOV-NEXT: bx lr
;
; CHECK-THUMB1-LABEL: test_slt1:
; CHECK-THUMB1: @ %bb.0: @ %entry
; CHECK-THUMB1-NEXT: subs r0, r0, r2
Expand Down Expand Up @@ -57,6 +70,23 @@ define void @test_slt2(i64 %a, i64 %b) {
; CHECK-ARM-NEXT: bl g
; CHECK-ARM-NEXT: pop {r11, pc}
;
; CHECK-THUMB1-NOMOV-LABEL: test_slt2:
; CHECK-THUMB1-NOMOV: @ %bb.0: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: .save {r7, lr}
; CHECK-THUMB1-NOMOV-NEXT: push {r7, lr}
; CHECK-THUMB1-NOMOV-NEXT: subs r0, r0, r2
; CHECK-THUMB1-NOMOV-NEXT: sbcs r1, r3
; CHECK-THUMB1-NOMOV-NEXT: bge .LBB1_2
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %bb1
; CHECK-THUMB1-NOMOV-NEXT: bl f
; CHECK-THUMB1-NOMOV-NEXT: b .LBB1_3
; CHECK-THUMB1-NOMOV-NEXT: .LBB1_2: @ %bb2
; CHECK-THUMB1-NOMOV-NEXT: bl g
; CHECK-THUMB1-NOMOV-NEXT: .LBB1_3: @ %bb1
; CHECK-THUMB1-NOMOV-NEXT: pop {r7}
; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
; CHECK-THUMB1-NOMOV-NEXT: bx r0
;
; CHECK-THUMB1-LABEL: test_slt2:
; CHECK-THUMB1: @ %bb.0: @ %entry
; CHECK-THUMB1-NEXT: push {r7, lr}
Expand Down Expand Up @@ -95,3 +125,193 @@ bb2:

declare void @f()
declare void @g()

define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) {
; CHECK-ARM-LABEL: test_slt_select:
; CHECK-ARM: @ %bb.0: @ %entry
; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-ARM-NEXT: ldr r12, [sp, #32]
; CHECK-ARM-NEXT: mov r6, #0
; CHECK-ARM-NEXT: ldr lr, [sp, #24]
; CHECK-ARM-NEXT: ldr r7, [sp, #36]
; CHECK-ARM-NEXT: ldr r5, [sp, #28]
; CHECK-ARM-NEXT: subs r4, lr, r12
; CHECK-ARM-NEXT: sbcs r7, r5, r7
; CHECK-ARM-NEXT: movwlo r6, #1
; CHECK-ARM-NEXT: cmp r6, #0
; CHECK-ARM-NEXT: moveq r0, r2
; CHECK-ARM-NEXT: moveq r1, r3
; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-THUMB1-NOMOV-LABEL: test_slt_select:
; CHECK-THUMB1-NOMOV: @ %bb.0: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-THUMB1-NOMOV-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-THUMB1-NOMOV-NEXT: .pad #4
; CHECK-THUMB1-NOMOV-NEXT: sub sp, #4
; CHECK-THUMB1-NOMOV-NEXT: ldr r4, [sp, #36]
; CHECK-THUMB1-NOMOV-NEXT: ldr r5, [sp, #28]
; CHECK-THUMB1-NOMOV-NEXT: ldr r6, [sp, #32]
; CHECK-THUMB1-NOMOV-NEXT: ldr r7, [sp, #24]
; CHECK-THUMB1-NOMOV-NEXT: subs r6, r7, r6
; CHECK-THUMB1-NOMOV-NEXT: sbcs r5, r4
; CHECK-THUMB1-NOMOV-NEXT: blo .LBB2_2
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: movs r4, #0
; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0
; CHECK-THUMB1-NOMOV-NEXT: beq .LBB2_3
; CHECK-THUMB1-NOMOV-NEXT: b .LBB2_4
; CHECK-THUMB1-NOMOV-NEXT: .LBB2_2:
; CHECK-THUMB1-NOMOV-NEXT: movs r4, #1
; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0
; CHECK-THUMB1-NOMOV-NEXT: bne .LBB2_4
; CHECK-THUMB1-NOMOV-NEXT: .LBB2_3: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: movs r0, r2
; CHECK-THUMB1-NOMOV-NEXT: .LBB2_4: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0
; CHECK-THUMB1-NOMOV-NEXT: bne .LBB2_6
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.5: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: movs r1, r3
; CHECK-THUMB1-NOMOV-NEXT: .LBB2_6: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: add sp, #4
; CHECK-THUMB1-NOMOV-NEXT: pop {r4, r5, r6, r7}
; CHECK-THUMB1-NOMOV-NEXT: pop {r2}
; CHECK-THUMB1-NOMOV-NEXT: bx r2
;
; CHECK-THUMB1-LABEL: test_slt_select:
; CHECK-THUMB1: @ %bb.0: @ %entry
; CHECK-THUMB1-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-THUMB1-NEXT: sub sp, #4
; CHECK-THUMB1-NEXT: ldr r4, [sp, #36]
; CHECK-THUMB1-NEXT: ldr r5, [sp, #28]
; CHECK-THUMB1-NEXT: ldr r6, [sp, #32]
; CHECK-THUMB1-NEXT: ldr r7, [sp, #24]
; CHECK-THUMB1-NEXT: subs r6, r7, r6
; CHECK-THUMB1-NEXT: sbcs r5, r4
; CHECK-THUMB1-NEXT: blo .LBB2_2
; CHECK-THUMB1-NEXT: @ %bb.1: @ %entry
; CHECK-THUMB1-NEXT: movs r4, #0
; CHECK-THUMB1-NEXT: cmp r4, #0
; CHECK-THUMB1-NEXT: beq .LBB2_3
; CHECK-THUMB1-NEXT: b .LBB2_4
; CHECK-THUMB1-NEXT: .LBB2_2:
; CHECK-THUMB1-NEXT: movs r4, #1
; CHECK-THUMB1-NEXT: cmp r4, #0
; CHECK-THUMB1-NEXT: bne .LBB2_4
; CHECK-THUMB1-NEXT: .LBB2_3: @ %entry
; CHECK-THUMB1-NEXT: mov r0, r2
; CHECK-THUMB1-NEXT: .LBB2_4: @ %entry
; CHECK-THUMB1-NEXT: cmp r4, #0
; CHECK-THUMB1-NEXT: beq .LBB2_6
; CHECK-THUMB1-NEXT: @ %bb.5: @ %entry
; CHECK-THUMB1-NEXT: add sp, #4
; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-THUMB1-NEXT: .LBB2_6: @ %entry
; CHECK-THUMB1-NEXT: mov r1, r3
; CHECK-THUMB1-NEXT: add sp, #4
; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
;
; CHECK-THUMB2-LABEL: test_slt_select:
; CHECK-THUMB2: @ %bb.0: @ %entry
; CHECK-THUMB2-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-THUMB2-NEXT: sub sp, #4
; CHECK-THUMB2-NEXT: ldrd r12, r7, [sp, #32]
; CHECK-THUMB2-NEXT: movs r6, #0
; CHECK-THUMB2-NEXT: ldrd lr, r5, [sp, #24]
; CHECK-THUMB2-NEXT: subs.w r4, lr, r12
; CHECK-THUMB2-NEXT: sbcs.w r7, r5, r7
; CHECK-THUMB2-NEXT: it lo
; CHECK-THUMB2-NEXT: movlo r6, #1
; CHECK-THUMB2-NEXT: cmp r6, #0
; CHECK-THUMB2-NEXT: itt eq
; CHECK-THUMB2-NEXT: moveq r0, r2
; CHECK-THUMB2-NEXT: moveq r1, r3
; CHECK-THUMB2-NEXT: add sp, #4
; CHECK-THUMB2-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%cmp = icmp ult i64 %a, %b
%r1 = select i1 %cmp, i64 %c, i64 %d
ret i64 %r1
}

define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) {
; CHECK-ARM-LABEL: test_slt_not:
; CHECK-ARM: @ %bb.0: @ %entry
; CHECK-ARM-NEXT: ldr r12, [sp]
; CHECK-ARM-NEXT: mov r1, #0
; CHECK-ARM-NEXT: ldr r0, [sp, #4]
; CHECK-ARM-NEXT: subs r2, r2, r12
; CHECK-ARM-NEXT: sbcs r0, r3, r0
; CHECK-ARM-NEXT: mov r0, #0
; CHECK-ARM-NEXT: movwge r1, #1
; CHECK-ARM-NEXT: movwlt r0, #1
; CHECK-ARM-NEXT: bx lr
;
; CHECK-THUMB1-NOMOV-LABEL: test_slt_not:
; CHECK-THUMB1-NOMOV: @ %bb.0: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: .save {r4, r5, r7, lr}
; CHECK-THUMB1-NOMOV-NEXT: push {r4, r5, r7, lr}
; CHECK-THUMB1-NOMOV-NEXT: movs r1, #1
; CHECK-THUMB1-NOMOV-NEXT: movs r4, #0
; CHECK-THUMB1-NOMOV-NEXT: ldr r0, [sp, #20]
; CHECK-THUMB1-NOMOV-NEXT: ldr r5, [sp, #16]
; CHECK-THUMB1-NOMOV-NEXT: subs r2, r2, r5
; CHECK-THUMB1-NOMOV-NEXT: sbcs r3, r0
; CHECK-THUMB1-NOMOV-NEXT: push {r1}
; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
; CHECK-THUMB1-NOMOV-NEXT: blt .LBB3_2
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: push {r4}
; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
; CHECK-THUMB1-NOMOV-NEXT: .LBB3_2: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: bge .LBB3_4
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.3: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: movs r1, r4
; CHECK-THUMB1-NOMOV-NEXT: .LBB3_4: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: pop {r4, r5, r7}
; CHECK-THUMB1-NOMOV-NEXT: pop {r2}
; CHECK-THUMB1-NOMOV-NEXT: bx r2
;
; CHECK-THUMB1-LABEL: test_slt_not:
; CHECK-THUMB1: @ %bb.0: @ %entry
; CHECK-THUMB1-NEXT: push {r4, r5, r7, lr}
; CHECK-THUMB1-NEXT: movs r1, #1
; CHECK-THUMB1-NEXT: movs r4, #0
; CHECK-THUMB1-NEXT: ldr r0, [sp, #20]
; CHECK-THUMB1-NEXT: ldr r5, [sp, #16]
; CHECK-THUMB1-NEXT: subs r2, r2, r5
; CHECK-THUMB1-NEXT: sbcs r3, r0
; CHECK-THUMB1-NEXT: mov r0, r1
; CHECK-THUMB1-NEXT: bge .LBB3_3
; CHECK-THUMB1-NEXT: @ %bb.1: @ %entry
; CHECK-THUMB1-NEXT: blt .LBB3_4
; CHECK-THUMB1-NEXT: .LBB3_2: @ %entry
; CHECK-THUMB1-NEXT: pop {r4, r5, r7, pc}
; CHECK-THUMB1-NEXT: .LBB3_3: @ %entry
; CHECK-THUMB1-NEXT: mov r0, r4
; CHECK-THUMB1-NEXT: bge .LBB3_2
; CHECK-THUMB1-NEXT: .LBB3_4: @ %entry
; CHECK-THUMB1-NEXT: mov r1, r4
; CHECK-THUMB1-NEXT: pop {r4, r5, r7, pc}
;
; CHECK-THUMB2-LABEL: test_slt_not:
; CHECK-THUMB2: @ %bb.0: @ %entry
; CHECK-THUMB2-NEXT: ldr.w r12, [sp]
; CHECK-THUMB2-NEXT: movs r1, #0
; CHECK-THUMB2-NEXT: ldr r0, [sp, #4]
; CHECK-THUMB2-NEXT: subs.w r2, r2, r12
; CHECK-THUMB2-NEXT: sbcs.w r0, r3, r0
; CHECK-THUMB2-NEXT: mov.w r0, #0
; CHECK-THUMB2-NEXT: ite lt
; CHECK-THUMB2-NEXT: movlt r0, #1
; CHECK-THUMB2-NEXT: movge r1, #1
; CHECK-THUMB2-NEXT: bx lr
entry:
%cmp = icmp slt i64 %a, %b
%not = xor i1 %cmp, true
%r1 = zext i1 %cmp to i32
%r2 = zext i1 %not to i32
%z = insertvalue { i32, i32 } undef, i32 %r1, 0
%z2 = insertvalue { i32, i32 } %z, i32 %r2, 1
ret { i32, i32 } %z2
}
25 changes: 23 additions & 2 deletions llvm/test/CodeGen/Thumb/select.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=thumb-pc-linux-gnueabi | FileCheck -check-prefix=CHECK-EABI %s
; RUN: llc < %s -mtriple=thumb-apple-darwin -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=thumb-pc-linux-gnueabi -verify-machineinstrs | FileCheck -check-prefix=CHECK-EABI %s

define i32 @f1(i32 %a.s) {
entry:
Expand Down Expand Up @@ -80,3 +80,24 @@ define double @f7(double %a, double %b) {
; CHECK-EABI: __aeabi_dcmplt
; CHECK-EABI: {{bne|beq}}
; CHECK-EABI: {{bne|beq}}

define {i32, i32} @f8(i32 %a, i32 %b, i32 %c, i32 %d) {
entry:
%cmp = icmp slt i32 %a, %b
%r1 = select i1 %cmp, i32 %c, i32 %a
%r2 = select i1 %cmp, i32 %d, i32 %b
%z = insertvalue { i32, i32 } undef, i32 %r1, 0
%z2 = insertvalue { i32, i32 } %z, i32 %r2, 1
ret { i32, i32 } %z2
}

; CHECK-LABEL: f8:
; CHECK: cmp r0, r1
; CHECK: blt
; CHECK: movs
; CHECK: cmp r0, r1
; CHECK: blt
; CHECK: movs
; CHECK: movs
; CHECK: movs
; CHECK: bx lr

0 comments on commit 7d7d41d

Please sign in to comment.