Skip to content

Commit b6d9add

Browse files
committed
[InstCombine] Optimize select(freeze(icmp eq/ne x, y), x, y)
This patch adds an optimization that folds select(freeze(icmp eq/ne x, y), x, y) to x or y. This was needed to resolve slowdown after D84940 is applied. I tried to bake this logic into foldSelectInstWithICmp, but it wasn't clear. This patch conservatively writes the pattern in a separate function, foldSelectWithFrozenICmp. The output does not need freeze; https://alive2.llvm.org/ce/z/X49hNE (from @nikic) Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D85533
1 parent 5d59385 commit b6d9add

File tree

2 files changed

+48
-8
lines changed

2 files changed

+48
-8
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2526,6 +2526,32 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
25262526
return nullptr;
25272527
}
25282528

2529+
static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
2530+
FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
2531+
if (!FI)
2532+
return nullptr;
2533+
2534+
Value *Cond = FI->getOperand(0);
2535+
Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
2536+
2537+
// select (freeze(x == y)), x, y --> y
2538+
// select (freeze(x != y)), x, y --> x
2539+
// The freeze should be only used by this select. Otherwise, remaining uses of
2540+
// the freeze can observe a contradictory value.
2541+
// c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1
2542+
// a = select c, x, y ;
2543+
// f(a, c) ; f(poison, 1) cannot happen, but if a is folded
2544+
// ; to y, this can happen.
2545+
CmpInst::Predicate Pred;
2546+
if (FI->hasOneUse() &&
2547+
match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) &&
2548+
(Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) {
2549+
return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal;
2550+
}
2551+
2552+
return nullptr;
2553+
}
2554+
25292555
Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
25302556
Value *CondVal = SI.getCondition();
25312557
Value *TrueVal = SI.getTrueValue();
@@ -2977,5 +3003,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
29773003
if (Instruction *PN = foldSelectToPhi(SI, DT, Builder))
29783004
return replaceInstUsesWith(SI, PN);
29793005

3006+
if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder))
3007+
return replaceInstUsesWith(SI, Fr);
3008+
29803009
return nullptr;
29813010
}

llvm/test/Transforms/InstCombine/select.ll

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2540,10 +2540,7 @@ define void @cond_freeze_multipleuses(i8 %x, i8 %y) {
25402540

25412541
define i32 @select_freeze_icmp_eq(i32 %x, i32 %y) {
25422542
; CHECK-LABEL: @select_freeze_icmp_eq(
2543-
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
2544-
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]]
2545-
; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]]
2546-
; CHECK-NEXT: ret i32 [[V]]
2543+
; CHECK-NEXT: ret i32 [[Y:%.*]]
25472544
;
25482545
%c = icmp eq i32 %x, %y
25492546
%c.fr = freeze i1 %c
@@ -2553,10 +2550,7 @@ define i32 @select_freeze_icmp_eq(i32 %x, i32 %y) {
25532550

25542551
define i32 @select_freeze_icmp_ne(i32 %x, i32 %y) {
25552552
; CHECK-LABEL: @select_freeze_icmp_ne(
2556-
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
2557-
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]]
2558-
; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]]
2559-
; CHECK-NEXT: ret i32 [[V]]
2553+
; CHECK-NEXT: ret i32 [[X:%.*]]
25602554
;
25612555
%c = icmp ne i32 %x, %y
25622556
%c.fr = freeze i1 %c
@@ -2576,3 +2570,20 @@ define i32 @select_freeze_icmp_else(i32 %x, i32 %y) {
25762570
%v = select i1 %c.fr, i32 %x, i32 %y
25772571
ret i32 %v
25782572
}
2573+
2574+
declare void @use_i1_i32(i1, i32)
2575+
2576+
define void @select_freeze_icmp_multuses(i32 %x, i32 %y) {
2577+
; CHECK-LABEL: @select_freeze_icmp_multuses(
2578+
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
2579+
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]]
2580+
; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]]
2581+
; CHECK-NEXT: call void @use_i1_i32(i1 [[C_FR]], i32 [[V]])
2582+
; CHECK-NEXT: ret void
2583+
;
2584+
%c = icmp ne i32 %x, %y
2585+
%c.fr = freeze i1 %c
2586+
%v = select i1 %c.fr, i32 %x, i32 %y
2587+
call void @use_i1_i32(i1 %c.fr, i32 %v)
2588+
ret void
2589+
}

0 commit comments

Comments
 (0)