From b6d9add71b1a7bc77ce504ed09a43288ca67c0cd Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Fri, 7 Aug 2020 21:12:52 +0900 Subject: [PATCH] [InstCombine] Optimize select(freeze(icmp eq/ne x, y), x, y) This patch adds an optimization that folds select(freeze(icmp eq/ne x, y), x, y) to x or y. This was needed to resolve slowdown after D84940 is applied. I tried to bake this logic into foldSelectInstWithICmp, but it wasn't clear. This patch conservatively writes the pattern in a separate function, foldSelectWithFrozenICmp. The output does not need freeze; https://alive2.llvm.org/ce/z/X49hNE (from @nikic) Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D85533 --- .../InstCombine/InstCombineSelect.cpp | 29 +++++++++++++++++++ llvm/test/Transforms/InstCombine/select.ll | 27 ++++++++++++----- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index c14a330542c9e..33da3c69ad91c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2526,6 +2526,32 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT, return nullptr; } +static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { + FreezeInst *FI = dyn_cast(Sel.getCondition()); + if (!FI) + return nullptr; + + Value *Cond = FI->getOperand(0); + Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); + + // select (freeze(x == y)), x, y --> y + // select (freeze(x != y)), x, y --> x + // The freeze should be only used by this select. Otherwise, remaining uses of + // the freeze can observe a contradictory value. + // c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1 + // a = select c, x, y ; + // f(a, c) ; f(poison, 1) cannot happen, but if a is folded + // ; to y, this can happen. + CmpInst::Predicate Pred; + if (FI->hasOneUse() && + match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) && + (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) { + return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal; + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -2977,5 +3003,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *PN = foldSelectToPhi(SI, DT, Builder)) return replaceInstUsesWith(SI, PN); + if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder)) + return replaceInstUsesWith(SI, Fr); + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 5d1fd76b2709a..4c651b7bf1453 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2540,10 +2540,7 @@ define void @cond_freeze_multipleuses(i8 %x, i8 %y) { define i32 @select_freeze_icmp_eq(i32 %x, i32 %y) { ; CHECK-LABEL: @select_freeze_icmp_eq( -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] -; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[V]] +; CHECK-NEXT: ret i32 [[Y:%.*]] ; %c = icmp eq i32 %x, %y %c.fr = freeze i1 %c @@ -2553,10 +2550,7 @@ define i32 @select_freeze_icmp_eq(i32 %x, i32 %y) { define i32 @select_freeze_icmp_ne(i32 %x, i32 %y) { ; CHECK-LABEL: @select_freeze_icmp_ne( -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] -; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]] -; CHECK-NEXT: ret i32 [[V]] +; CHECK-NEXT: ret i32 [[X:%.*]] ; %c = icmp ne i32 %x, %y %c.fr = freeze i1 %c @@ -2576,3 +2570,20 @@ define i32 @select_freeze_icmp_else(i32 %x, i32 %y) { %v = select i1 %c.fr, i32 %x, i32 %y ret i32 %v } + +declare void @use_i1_i32(i1, i32) + +define void @select_freeze_icmp_multuses(i32 %x, i32 %y) { +; CHECK-LABEL: @select_freeze_icmp_multuses( +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]] +; CHECK-NEXT: call void @use_i1_i32(i1 [[C_FR]], i32 [[V]]) +; CHECK-NEXT: ret void +; + %c = icmp ne i32 %x, %y + %c.fr = freeze i1 %c + %v = select i1 %c.fr, i32 %x, i32 %y + call void @use_i1_i32(i1 %c.fr, i32 %v) + ret void +}