[GuardWidening] Freeze the introduced use.

Guard widening optimization is able to move the condition from one guard to the previous one. As a result if the condition is poison and orginal second guard is never executed but the first one does, we introduce undefined behavior which was not observed in original program. To resolve the issue we must freeze the condition we are moving. However optimization itself does not know how to work with freeze. Additionally optimization is written in incremental way. For example we have three guards G1(base + 8 < L) G2(base + 16 < L) G3(base + 24 < L) On the first step GW will combine G1 and G2 as G1(base + 8 < L && freeze(base + 16 < L)) G2(true) G3(base + 24 < L) while combining G1 and G3 base appears to be different. To keep optimization enabled after freezing the moving condition, the freeze instruction is pushed as much as possible and later all uses of freezed values are replaced with frozen version. This is similar what instruction combining does but more aggressevely. Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D146699
llvm · Mar 29, 2023 · f4b2360 · f4b2360
1 parent fba2a7c
commit f4b2360
Show file tree

Hide file tree

Showing 13 changed files with 334 additions and 125 deletions.
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -69,6 +69,7 @@ using namespace llvm;
 
 STATISTIC(GuardsEliminated, "Number of eliminated guards");
 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
+STATISTIC(FreezeAdded, "Number of freeze instruction introduced");
 
 static cl::opt<bool>
     WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
@@ -209,6 +210,10 @@ class GuardWideningImpl {
   bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
                        Value *&Result, bool InvertCondition);
 
+  /// Adds freeze to Orig and push it as far as possible very aggressively.
+  /// Also replaces all uses of frozen instruction with frozen version.
+  Value *freezeAndPush(Value *Orig, Instruction *InsertPt);
+
   /// Represents a range check of the form \c Base + \c Offset u< \c Length,
   /// with the constraint that \c Length is not negative.  \c CheckInst is the
   /// pre-existing instruction in the IR that computes the result of this range
@@ -558,8 +563,80 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
     makeAvailableAt(Op, Loc);
 
   Inst->moveBefore(Loc);
-  // If we moved instruction before guard we must clean poison generating flags.
-  Inst->dropPoisonGeneratingFlags();
+}
+
+// Return Instruction before which we can insert freeze for the value V as close
+// to def as possible. If there is no place to add freeze, return nullptr.
+static Instruction *getFreezeInsertPt(Value *V, const DominatorTree &DT) {
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return &*DT.getRoot()->getFirstNonPHIOrDbgOrAlloca();
+
+  auto *Res = I->getInsertionPointAfterDef();
+  // If there is no place to add freeze - return nullptr.
+  if (!Res || !DT.dominates(I, Res))
+    return nullptr;
+
+  // If there is a User dominated by original I, then it should be dominated
+  // by Freeze instruction as well.
+  if (any_of(I->users(), [&](User *U) {
+        Instruction *User = cast<Instruction>(U);
+        return Res != User && DT.dominates(I, User) && !DT.dominates(Res, User);
+      }))
+    return nullptr;
+  return Res;
+}
+
+Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
+  if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT))
+    return Orig;
+  Instruction *InsertPtAtDef = getFreezeInsertPt(Orig, DT);
+  if (!InsertPtAtDef)
+    return new FreezeInst(Orig, "gw.freeze", InsertPt);
+  SmallSet<Value *, 16> Visited;
+  SmallVector<Value *, 16> Worklist;
+  SmallSet<Instruction *, 16> DropPoisonFlags;
+  SmallSet<Value *, 16> NeedFreeze;
+  Worklist.push_back(Orig);
+  while (!Worklist.empty()) {
+    Value *V = Worklist.pop_back_val();
+    if (!Visited.insert(V).second)
+      continue;
+
+    if (isGuaranteedNotToBePoison(V, nullptr, InsertPt, &DT))
+      continue;
+
+    Instruction *I = dyn_cast<Instruction>(V);
+    if (!I || canCreateUndefOrPoison(cast<Operator>(I),
+                                     /*ConsiderFlagsAndMetadata*/ false)) {
+      NeedFreeze.insert(V);
+      continue;
+    }
+    // Check all operands. If for any of them we cannot insert Freeze,
+    // stop here. Otherwise, iterate.
+    if (any_of(I->operands(), [&](Value *Op) {
+          return isa<Instruction>(Op) && !getFreezeInsertPt(Op, DT);
+        })) {
+      NeedFreeze.insert(I);
+      continue;
+    }
+    DropPoisonFlags.insert(I);
+    append_range(Worklist, I->operands());
+  }
+  for (Instruction *I : DropPoisonFlags)
+    I->dropPoisonGeneratingFlagsAndMetadata();
+
+  Value *Result = Orig;
+  for (Value *V : NeedFreeze) {
+    auto *FreezeInsertPt = getFreezeInsertPt(V, DT);
+    FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr", FreezeInsertPt);
+    ++FreezeAdded;
+    if (V == Orig)
+      Result = FI;
+    V->replaceUsesWithIf(FI, [&](Use & U)->bool { return U.getUser() != FI; });
+  }
+
+  return Result;
 }
 
 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
@@ -621,6 +698,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
         }
         assert(Result && "Failed to find result value");
         Result->setName("wide.chk");
+        Result = freezeAndPush(Result, InsertPt);
       }
       return true;
     }
@@ -633,6 +711,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
     makeAvailableAt(Cond1, InsertPt);
     if (InvertCondition)
       Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
+    Cond1 = freezeAndPush(Cond1, InsertPt);
     Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
   }
 

diff --git a/llvm/test/Transforms/GuardWidening/basic-loop.ll b/llvm/test/Transforms/GuardWidening/basic-loop.ll
@@ -10,11 +10,13 @@ declare void @llvm.experimental.guard(i1,...)
 define void @widen_within_loop(i1 %cond_0, i1 %cond_1, i1 %cond_2) {
 ; CHECK-LABEL: @widen_within_loop(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND_2_GW_FR:%.*]] = freeze i1 [[COND_2:%.*]]
+; CHECK-NEXT:    [[COND_1_GW_FR:%.*]] = freeze i1 [[COND_1:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    store i32 0, ptr @G, align 4
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
-; CHECK-NEXT:    [[WIDE_CHK1:%.*]] = and i1 [[WIDE_CHK]], [[COND_2:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1_GW_FR]]
+; CHECK-NEXT:    [[WIDE_CHK1:%.*]] = and i1 [[WIDE_CHK]], [[COND_2_GW_FR]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK1]]) [ "deopt"(i32 0) ]
 ; CHECK-NEXT:    store i32 1, ptr @G, align 4
 ; CHECK-NEXT:    store i32 2, ptr @G, align 4
@@ -38,9 +40,11 @@ loop:
 define void @widen_into_preheader(i1 %cond_0, i1 %cond_1, i1 %cond_2) {
 ; CHECK-LABEL: @widen_into_preheader(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND_2_GW_FR:%.*]] = freeze i1 [[COND_2:%.*]]
+; CHECK-NEXT:    [[COND_1_GW_FR:%.*]] = freeze i1 [[COND_1:%.*]]
 ; CHECK-NEXT:    store i32 0, ptr @G, align 4
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
-; CHECK-NEXT:    [[WIDE_CHK1:%.*]] = and i1 [[WIDE_CHK]], [[COND_2:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1_GW_FR]]
+; CHECK-NEXT:    [[WIDE_CHK1:%.*]] = and i1 [[WIDE_CHK]], [[COND_2_GW_FR]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK1]]) [ "deopt"(i32 0) ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
@@ -102,8 +106,9 @@ exit:
 define void @widen_over_common_exit_to_ph(i1 %cond_0, i1 %cond_1, i1 %cond_2) {
 ; CHECK-LABEL: @widen_over_common_exit_to_ph(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND_2_GW_FR:%.*]] = freeze i1 [[COND_2:%.*]]
 ; CHECK-NEXT:    store i32 0, ptr @G, align 4
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_2:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_2_GW_FR]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"(i32 0) ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:

diff --git a/llvm/test/Transforms/GuardWidening/basic.ll b/llvm/test/Transforms/GuardWidening/basic.ll
@@ -9,7 +9,8 @@ declare void @llvm.experimental.guard(i1,...)
 define void @f_0(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @f_0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[COND_1_GW_FR:%.*]] = freeze i1 [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1_GW_FR]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
 ; CHECK-NEXT:    ret void
 ;
@@ -24,7 +25,8 @@ entry:
 define void @f_1(i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @f_1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[COND_1_GW_FR:%.*]] = freeze i1 [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1_GW_FR]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
 ; CHECK-NEXT:    br i1 undef, label [[LEFT:%.*]], label [[RIGHT:%.*]]
 ; CHECK:       left:
@@ -55,8 +57,9 @@ merge:
 define void @f_2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @f_2(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B_GW_FR:%.*]] = freeze i32 [[B:%.*]]
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10
-; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10
+; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B_GW_FR]], 10
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
 ; CHECK-NEXT:    br i1 undef, label [[LEFT:%.*]], label [[RIGHT:%.*]]
@@ -122,8 +125,9 @@ right:
 define void @f_4(i32 %a, i32 %b) {
 ; CHECK-LABEL: @f_4(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B_GW_FR:%.*]] = freeze i32 [[B:%.*]]
 ; CHECK-NEXT:    [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10
-; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10
+; CHECK-NEXT:    [[COND_1:%.*]] = icmp ult i32 [[B_GW_FR]], 10
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
 ; CHECK-NEXT:    br i1 undef, label [[LOOP:%.*]], label [[LEAVE:%.*]]
@@ -204,8 +208,9 @@ entry:
 define void @f_7(i32 %a, ptr %cond_buf) {
 ; CHECK-LABEL: @f_7(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_GW_FR:%.*]] = freeze i32 [[A:%.*]]
 ; CHECK-NEXT:    [[COND_1:%.*]] = load volatile i1, ptr [[COND_BUF:%.*]], align 1
-; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A_GW_FR]], 7
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_3]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
 ; CHECK-NEXT:    [[COND_2:%.*]] = load volatile i1, ptr [[COND_BUF]], align 1
@@ -239,12 +244,13 @@ right:
 define void @f_8(i32 %a, i1 %cond_1, i1 %cond_2) {
 ; CHECK-LABEL: @f_8(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_GW_FR:%.*]] = freeze i32 [[A:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[COND_1:%.*]]) [ "deopt"() ]
 ; CHECK-NEXT:    br i1 undef, label [[LOOP]], label [[LEAVE:%.*]]
 ; CHECK:       leave:
-; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[COND_3:%.*]] = icmp ult i32 [[A_GW_FR]], 7
 ; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_2:%.*]], [[COND_3]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
 ; CHECK-NEXT:    br i1 undef, label [[LOOP2:%.*]], label [[LEAVE2:%.*]]
@@ -332,9 +338,10 @@ no_loop:
 define void @f_11(i32 %a, i1 %cond_0, i1 %cond_1) {
 ; CHECK-LABEL: @f_11(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND_1_GW_FR:%.*]] = freeze i1 [[COND_1:%.*]]
 ; CHECK-NEXT:    br label [[INNER:%.*]]
 ; CHECK:       inner:
-; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]]
+; CHECK-NEXT:    [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1_GW_FR]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
 ; CHECK-NEXT:    br i1 undef, label [[INNER]], label [[OUTER:%.*]]
 ; CHECK:       outer:
@@ -358,7 +365,8 @@ outer:
 define void @f_12(i32 %a0) {
 ; CHECK-LABEL: @f_12(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A0:%.*]], [[A0]]
+; CHECK-NEXT:    [[A0_GW_FR:%.*]] = freeze i32 [[A0:%.*]]
+; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A0_GW_FR]], [[A0_GW_FR]]
 ; CHECK-NEXT:    [[A2:%.*]] = mul i32 [[A1]], [[A1]]
 ; CHECK-NEXT:    [[A3:%.*]] = mul i32 [[A2]], [[A2]]
 ; CHECK-NEXT:    [[A4:%.*]] = mul i32 [[A3]], [[A3]]