- 
                Notifications
    
You must be signed in to change notification settings  - Fork 15.1k
 
[SCEV] Add special handling for phi when BranchInst's edge doesn't dominate use in phi #163146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…minate use in phi
Enhance createNodeFromSelectLikePHI to handle following pattern, where
left edge from `entry` to `cmp_true` doesn't dominate use `32` in the phi.
  entry:
    %b = and i32 %a, 31
    %c = icmp eq i32 %b, 0
    br i1 %c, label %cmp_true, label %cmp_false
  cmp_false:
    br i1 %p, label %cmp_true, label %merge
  cmp_true:
    br label %merge
  merge:
    %d = phi i32 [32, %cmp_true], [%b, %cmp_false]
This is a common pattern for computing OpenCL sub-group size on our
downstream target.
This PR refines the phi's range from [0, 33) to [1, 33).
MaxBECount for the loop in the lit is tightened from -1 to 32, which
enables loop unrolling and improves performance.
    | 
          
 @llvm/pr-subscribers-llvm-analysis Author: Wenju He (wenju-he) ChangesEnhance createNodeFromSelectLikePHI to handle following pattern, where left edge from  Full diff: https://github.com/llvm/llvm-project/pull/163146.diff 3 Files Affected: 
 diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 8876e4ed6ae4f..641a2e53b825a 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1857,6 +1857,11 @@ class ScalarEvolution {
   const SCEV *createNodeForSelectOrPHI(Value *V, Value *Cond, Value *TrueVal,
                                        Value *FalseVal);
 
+  /// Provide special handling for phi when BranchInst's one edge does not
+  /// dominate compare operand's use in phi.
+  std::optional<const SCEV *>
+  createNodeForPHIWithEdgeNotDominatesUse(BranchInst *BI, PHINode *PN);
+
   /// Provide the special handling we need to analyze GEP SCEVs.
   const SCEV *createNodeForGEP(GEPOperator *GEP);
 
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 30bcff7c14923..1b5fbf5bb70f7 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6046,11 +6046,15 @@ const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
     auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
     Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
 
-    if (BI && BI->isConditional() &&
-        BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
-        properlyDominates(getSCEV(LHS), PN->getParent()) &&
-        properlyDominates(getSCEV(RHS), PN->getParent()))
-      return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
+    if (BI && BI->isConditional()) {
+      if (BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
+          properlyDominates(getSCEV(LHS), PN->getParent()) &&
+          properlyDominates(getSCEV(RHS), PN->getParent()))
+        return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
+      if (std::optional<const SCEV *> S =
+              createNodeForPHIWithEdgeNotDominatesUse(BI, PN))
+        return *S;
+    }
   }
 
   return nullptr;
@@ -6339,6 +6343,66 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Value *V, Value *Cond,
   return createNodeForSelectOrPHIViaUMinSeq(V, Cond, TrueVal, FalseVal);
 }
 
+// Recognize PHI of the form: (x == 0 ? C : (Pred ? C : x)) where C is a
+// constant equal to umax(x) + 1. Canonicalize it to: umax(1, x + 1).
+// e.g.
+//   %x = and i32 %a, 31                              ; %x is in [0,31]
+//   %c = icmp eq i32 %x, 0
+//   br i1 %c, label %cmp_true, label %cmp_false
+// cmp_false:
+//   br i1 %p, label %cmp_true, label %merge
+// cmp_true:
+//   br label %merge
+// merge:
+//   %d = phi i32 [32, %cmp_true], [%x, %cmp_false]   ; 32 == umax(%x) + 1
+std::optional<const SCEV *>
+ScalarEvolution::createNodeForPHIWithEdgeNotDominatesUse(BranchInst *BI,
+                                                         PHINode *PN) {
+  auto *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return std::nullopt;
+
+  Value *LHS = Cond->getOperand(0);
+  Value *RHS = Cond->getOperand(1);
+
+  // Match an ICmpInst condition of the form "x == 0".
+  auto *ZeroC = dyn_cast<ConstantInt>(RHS);
+  if (Cond->getPredicate() != ICmpInst::ICMP_EQ || !ZeroC || !ZeroC->isZero())
+    return std::nullopt;
+
+  BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
+  BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
+
+  if (!LeftEdge.isSingleEdge())
+    return std::nullopt;
+
+  assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
+
+  Use &LeftUse = PN->getOperandUse(0);
+  Use &RightUse = PN->getOperandUse(1);
+
+  ConstantInt *ConstantVal = nullptr;
+  Value *FalseVal = nullptr;
+  if (!DT.dominates(LeftEdge, LeftUse) && isa<ConstantInt>(LeftUse) &&
+      DT.dominates(RightEdge, RightUse)) {
+    ConstantVal = cast<ConstantInt>(LeftUse);
+    FalseVal = RightUse;
+  }
+
+  if (!ConstantVal || FalseVal != LHS)
+    return std::nullopt;
+
+  Type *Ty = PN->getType();
+  const SCEV *X = getNoopOrZeroExtend(getSCEV(LHS), Ty);
+  APInt MaxRange = getUnsignedRangeMax(X);
+  if (!MaxRange.isMaxValue() && ConstantVal->getValue() == (MaxRange + 1)) {
+    auto *One = getOne(Ty);
+    return getUMaxExpr(One, getAddExpr(X, One));
+  }
+
+  return std::nullopt;
+}
+
 /// Expand GEP instructions into add and multiply operations. This allows them
 /// to be analyzed by regular SCEV code.
 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
diff --git a/llvm/test/Analysis/ScalarEvolution/max-be-count-phi-not-dominate-use.ll b/llvm/test/Analysis/ScalarEvolution/max-be-count-phi-not-dominate-use.ll
new file mode 100644
index 0000000000000..9d7c8e4aa54ed
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/max-be-count-phi-not-dominate-use.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes='print<scalar-evolution>' -disable-output %s 2>&1 | FileCheck %s
+
+define void @max-be-count-eq-distance-add-constant-var-phi(i32 %a, i1 %p) {
+; CHECK-LABEL: 'max-be-count-eq-distance-add-constant-var-phi'
+; CHECK-NEXT:  Classifying expressions for: @max-be-count-eq-distance-add-constant-var-phi
+; CHECK-NEXT:    %b = and i32 %a, 31
+; CHECK-NEXT:    --> (zext i5 (trunc i32 %a to i5) to i32) U: [0,32) S: [0,32)
+; CHECK-NEXT:    %d = phi i32 [ 32, %cmp_true ], [ %b, %cmp_false ]
+; CHECK-NEXT:    --> (1 + (zext i5 (trunc i32 %a to i5) to i32))<nuw><nsw> U: [1,33) S: [1,33)
+; CHECK-NEXT:    %i = phi i32 [ 0, %merge ], [ %inc, %for.body ]
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%for.body> U: [0,32) S: [0,32) Exits: (zext i5 (trunc i32 %a to i5) to i32) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %inc = add i32 %i, 1
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%for.body> U: [1,33) S: [1,33) Exits: (1 + (zext i5 (trunc i32 %a to i5) to i32))<nuw><nsw> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @max-be-count-eq-distance-add-constant-var-phi
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (zext i5 (trunc i32 %a to i5) to i32)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is i32 31
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (zext i5 (trunc i32 %a to i5) to i32)
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+;
+entry:
+  %b = and i32 %a, 31
+  %c = icmp eq i32 %b, 0
+  br i1 %c, label %cmp_true, label %cmp_false
+
+cmp_false:
+  br i1 %p, label %cmp_true, label %merge
+
+cmp_true:
+  br label %merge
+
+merge:
+  %d = phi i32 [32, %cmp_true], [%b, %cmp_false]
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %merge ], [ %inc, %for.body ]
+  %inc = add i32 %i, 1
+  %exitcond.not = icmp eq i32 %inc, %d
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
 | 
    
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to add an InstCombine or something like that for this pattern?
I also have a pending patch that touches this code (#152823), but I guess we'll see which is ready first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, I'm not understanding the pattern. Can you write it in Alive2? I got as far as the following (https://alive2.llvm.org/ce/z/7WAcTS):
define i32 @src(i32 %a, i1 %p) {
entry:
  %b = and i32 %a, 31
  %c = icmp eq i32 %b, 0
  br i1 %c, label %cmp_true, label %cmp_false
cmp_false:
  br i1 %p, label %cmp_true, label %merge
cmp_true:
  br label %merge
merge:
  %d = phi i32 [32, %cmp_true], [%b, %cmp_false]
  ret i32 %d
}
define i32 @tgt(i32 %a, i1 %p) {
  %b = and i32 %a, 31
  %add = add i32 %a, 1
  %max = call i32 @llvm.umax.i32(i32 %add, i32 1)
  ret i32 %max
}
          
 It would be https://alive2.llvm.org/ce/z/vLckwS and  
 Thanks for the suggestion. CFG in above   | 
    
| ; CHECK-NEXT: %b = and i32 %a, 31 | ||
| ; CHECK-NEXT: --> (zext i5 (trunc i32 %a to i5) to i32) U: [0,32) S: [0,32) | ||
| ; CHECK-NEXT: %d = phi i32 [ 32, %cmp_true ], [ %b, %cmp_false ] | ||
| ; CHECK-NEXT: --> (1 + (zext i5 (trunc i32 %a to i5) to i32))<nuw><nsw> U: [1,33) S: [1,33) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't match the alive2 pattern. It doesn't reference "p" anywhere, and I'm not sure why you're adding 1.
Enhance createNodeFromSelectLikePHI to handle following pattern, where left edge from
entrytocmp_truedoesn't dominate use32in the phi.entry:
%b = and i32 %a, 31
%c = icmp eq i32 %b, 0
br i1 %c, label %cmp_true, label %cmp_false
cmp_false:
br i1 %p, label %cmp_true, label %merge
cmp_true:
br label %merge
merge:
%d = phi i32 [32, %cmp_true], [%b, %cmp_false]
This is a common pattern for computing OpenCL sub-group size on our downstream target.
This PR refines the phi's range from [0, 33) to [1, 33). MaxBECount for the loop in the lit test is tightened from -1 to 32, which enables complete loop unroll.