Skip to content

Commit

Permalink
Fix ARM peephole optimizeCompare to avoid optimizing unsigned cmp to 0.
Browse files Browse the repository at this point in the history
Summary:
Previously it only avoided optimizing signed comparisons to 0.
Sometimes the DAGCombiner will optimize the unsigned comparisons
to 0 before it gets to the peephole pass, but sometimes it doesn't.

Fix for PR22373.

Test Plan: test/CodeGen/ARM/sub-cmp-peephole.ll

Reviewers: jfb, manmanren

Subscribers: aemerson, llvm-commits

Differential Revision: http://reviews.llvm.org/D7274

llvm-svn: 227809
  • Loading branch information
Jan Wen Voung committed Feb 2, 2015
1 parent 526e092 commit d21194f
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 11 deletions.
34 changes: 23 additions & 11 deletions llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Expand Up @@ -2400,7 +2400,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri, we need to check Sub, thus we can't return here.
// For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
// Thus we cannot return here.
if (CmpInstr->getOpcode() == ARM::CMPri ||
CmpInstr->getOpcode() == ARM::t2CMPri)
MI = nullptr;
Expand Down Expand Up @@ -2479,8 +2480,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
case ARM::t2EORrr:
case ARM::t2EORri: {
// Scan forward for the use of CPSR
// When checking against MI: if it's a conditional code requires
// checking of V bit, then this is not safe to do.
// When checking against MI: if it's a conditional code that requires
// checking of the V bit or C bit, then this is not safe to do.
// It is safe to remove CmpInstr if CPSR is redefined or killed.
// If we are done with the basic block, we need to check whether CPSR is
// live-out.
Expand Down Expand Up @@ -2547,19 +2548,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
OperandsToUpdate.push_back(
std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
} else
} else {
// No Sub, so this is x = <op> y, z; cmp x, 0.
switch (CC) {
default:
case ARMCC::EQ: // Z
case ARMCC::NE: // Z
case ARMCC::MI: // N
case ARMCC::PL: // N
case ARMCC::AL: // none
// CPSR can be used multiple times, we should continue.
break;
case ARMCC::VS:
case ARMCC::VC:
case ARMCC::GE:
case ARMCC::LT:
case ARMCC::GT:
case ARMCC::LE:
case ARMCC::HS: // C
case ARMCC::LO: // C
case ARMCC::VS: // V
case ARMCC::VC: // V
case ARMCC::HI: // C Z
case ARMCC::LS: // C Z
case ARMCC::GE: // N V
case ARMCC::LT: // N V
case ARMCC::GT: // Z N V
case ARMCC::LE: // Z N V
// The instruction uses the V bit or C bit which is not safe.
return false;
}
}
}
}

Expand Down
60 changes: 60 additions & 0 deletions llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
Expand Up @@ -88,6 +88,19 @@ if.end11: ; preds = %num2long.exit
ret i32 23
}

; When considering the producer of cmp's src as the subsuming instruction,
; only consider that when the comparison is to 0.
define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) {
entry:
; CHECK-LABEL: cmp_src_nonzero:
; CHECK: sub
; CHECK: cmp
%sub = sub i32 %a, %b
%cmp = icmp eq i32 %sub, 17
%ret = select i1 %cmp, i32 %x, i32 %y
ret i32 %ret
}

define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
entry:
; CHECK-LABEL: float_sel:
Expand Down Expand Up @@ -144,3 +157,50 @@ entry:
store i32 %sub, i32* @t
ret double %ret
}

declare void @abort()
declare void @exit(i32)

; If the comparison uses the V bit (signed overflow/underflow), we can't
; omit the comparison.
define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) {
entry:
; CHECK-LABEL: cmp_slt0
; CHECK: sub
; CHECK: cmp
; CHECK: bge
%load = load i32* @t, align 4
%sub = sub i32 %load, 17
%cmp = icmp slt i32 %sub, 0
br i1 %cmp, label %if.then, label %if.else

if.then:
call void @abort()
unreachable

if.else:
call void @exit(i32 0)
unreachable
}

; Same for the C bit. (Note the ult X, 0 is trivially
; false, so the DAG combiner may or may not optimize it).
define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) {
entry:
; CHECK-LABEL: cmp_ult0
; CHECK: sub
; CHECK: cmp
; CHECK: bhs
%load = load i32* @t, align 4
%sub = sub i32 %load, 17
%cmp = icmp ult i32 %sub, 0
br i1 %cmp, label %if.then, label %if.else

if.then:
call void @abort()
unreachable

if.else:
call void @exit(i32 0)
unreachable
}

0 comments on commit d21194f

Please sign in to comment.