Skip to content

Conversation

@dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Oct 22, 2025

Since new select/phi instructions may construct loops, the expression tree to be simplified may still be incomplete (i.e., it may contain select with dummy values or phi without incoming values). This patch removes the call to simplifyInstruction for now, as it doesn't break existing tests.

Original PR: https://reviews.llvm.org/D36073
Fix the crash reported in #163453 (comment).

@dtcxzyw dtcxzyw requested a review from nikic October 22, 2025 15:38
@dtcxzyw dtcxzyw marked this pull request as ready for review October 22, 2025 15:39
@llvmbot
Copy link
Member

llvmbot commented Oct 22, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

Since new select/phi instructions may construct loops, the expression tree to be simplified may still be incomplete (i.e., it may contain select with dummy values or phi without incoming values). This patch removes the call to simplifyInstruction for now, as it doesn't break existing tests.

Original PR: https://reviews.llvm.org/D36073
Fix the crash reported in #163453 (comment).


Full diff: https://github.com/llvm/llvm-project/pull/164628.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+6-36)
  • (modified) llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll (+38)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9e78ec96a2f27..8ea132626a5af 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4030,7 +4030,6 @@ bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
 /// if it is simplified.
 class SimplificationTracker {
   DenseMap<Value *, Value *> Storage;
-  const SimplifyQuery &SQ;
   // Tracks newly created Phi nodes. The elements are iterated by insertion
   // order.
   PhiNodeSet AllPhiNodes;
@@ -4038,8 +4037,6 @@ class SimplificationTracker {
   SmallPtrSet<SelectInst *, 32> AllSelectNodes;
 
 public:
-  SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
-
   Value *Get(Value *V) {
     do {
       auto SV = Storage.find(V);
@@ -4049,30 +4046,6 @@ class SimplificationTracker {
     } while (true);
   }
 
-  Value *Simplify(Value *Val) {
-    SmallVector<Value *, 32> WorkList;
-    SmallPtrSet<Value *, 32> Visited;
-    WorkList.push_back(Val);
-    while (!WorkList.empty()) {
-      auto *P = WorkList.pop_back_val();
-      if (!Visited.insert(P).second)
-        continue;
-      if (auto *PI = dyn_cast<Instruction>(P))
-        if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
-          for (auto *U : PI->users())
-            WorkList.push_back(cast<Value>(U));
-          Put(PI, V);
-          PI->replaceAllUsesWith(V);
-          if (auto *PHI = dyn_cast<PHINode>(PI))
-            AllPhiNodes.erase(PHI);
-          if (auto *Select = dyn_cast<SelectInst>(PI))
-            AllSelectNodes.erase(Select);
-          PI->eraseFromParent();
-        }
-    }
-    return Get(Val);
-  }
-
   void Put(Value *From, Value *To) { Storage.insert({From, To}); }
 
   void ReplacePhi(PHINode *From, PHINode *To) {
@@ -4133,8 +4106,7 @@ class AddressingModeCombiner {
   /// Common Type for all different fields in addressing modes.
   Type *CommonType = nullptr;
 
-  /// SimplifyQuery for simplifyInstruction utility.
-  const SimplifyQuery &SQ;
+  const DataLayout &DL;
 
   /// Original Address.
   Value *Original;
@@ -4143,8 +4115,8 @@ class AddressingModeCombiner {
   Value *CommonValue = nullptr;
 
 public:
-  AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
-      : SQ(_SQ), Original(OriginalValue) {}
+  AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
+      : DL(DL), Original(OriginalValue) {}
 
   ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
 
@@ -4256,7 +4228,7 @@ class AddressingModeCombiner {
     // Keep track of keys where the value is null. We will need to replace it
     // with constant null when we know the common type.
     SmallVector<Value *, 2> NullValue;
-    Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
+    Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
     for (auto &AM : AddrModes) {
       Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
       if (DV) {
@@ -4306,7 +4278,7 @@ class AddressingModeCombiner {
     // simplification is possible only if original phi/selects were not
     // simplified yet.
     // Using this mapping we can find the current value in AddrToBase.
-    SimplificationTracker ST(SQ);
+    SimplificationTracker ST;
 
     // First step, DFS to create PHI nodes for all intermediate blocks.
     // Also fill traverse order for the second step.
@@ -4465,7 +4437,6 @@ class AddressingModeCombiner {
           PHI->addIncoming(ST.Get(Map[PV]), B);
         }
       }
-      Map[Current] = ST.Simplify(V);
     }
   }
 
@@ -5856,8 +5827,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // the graph are compatible.
   bool PhiOrSelectSeen = false;
   SmallVector<Instruction *, 16> AddrModeInsts;
-  const SimplifyQuery SQ(*DL, TLInfo);
-  AddressingModeCombiner AddrModes(SQ, Addr);
+  AddressingModeCombiner AddrModes(*DL, Addr);
   TypePromotionTransaction TPT(RemovedInsts);
   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
       TPT.getRestorationPoint();
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
index 63fd1845594c6..2570b3b2efc31 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
@@ -951,3 +951,41 @@ fallthrough:
   %v = add i32 %v1, %v2
   ret i32 %v
 }
+
+; Make sure we don't simplify an incomplete expression tree.
+define i8 @pr163453(ptr %p, i1 %cond) {
+; CHECK-LABEL: define i8 @pr163453(
+; CHECK-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P_ADDR_0:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR11:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], ptr [[P_ADDR_0]], ptr [[INCDEC_PTR11]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[SPEC_SELECT]], align 1
+; CHECK-NEXT:    ret i8 [[LOAD]]
+;
+entry:
+  br label %for.cond
+
+for.cond:
+  %p.pn = phi ptr [ %p, %entry ], [ %p.addr.0, %for.inc ]
+  %p.addr.0 = getelementptr i8, ptr %p.pn, i64 1
+  br i1 false, label %exit, label %for.body
+
+for.body:
+  %1 = load i8, ptr %p.pn, align 1
+  br i1 false, label %for.inc, label %if.else
+
+if.else:
+  %incdec.ptr11 = getelementptr i8, ptr %p.pn, i64 2
+  %spec.select = select i1 %cond, ptr %p.addr.0, ptr %incdec.ptr11
+  br label %exit
+
+for.inc:
+  br label %for.cond
+
+exit:
+  %p.addr.3 = phi ptr [ %spec.select, %if.else ], [ %p.addr.0, %for.cond ]
+  %load = load i8, ptr %p.addr.3, align 1
+  ret i8 %load
+}

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does SimplificationTracker still do something useful if the Simplify() part is dropped?

Original PR: https://reviews.llvm.org/D36073

There is some talk about tests in the review discussion, but the actual 400 loc commit does not have even a single test :(

@nikic
Copy link
Contributor

nikic commented Oct 22, 2025

Okay, looks like the tests landed in a separate commit for some reason: cde03f3

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Oct 24, 2025

Does SimplificationTracker still do something useful if the Simplify() part is dropped?

Original PR: https://reviews.llvm.org/D36073

There is some talk about tests in the review discussion, but the actual 400 loc commit does not have even a single test :(

Yeah. It still maintains the set of newly inserted selects/PHI nodes. It also tracks the phi replacement if AddrCombiner finds a match. TBH I believe it needs some refactoring. But I don't like to touch this code :(

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@dtcxzyw dtcxzyw merged commit 59e601a into llvm:main Oct 25, 2025
15 checks passed
@dtcxzyw dtcxzyw deleted the perf/cgp-no-simplify branch October 25, 2025 08:47
@llvm-ci
Copy link
Collaborator

llvm-ci commented Oct 25, 2025

LLVM Buildbot has detected a new failure on builder bolt-aarch64-ubuntu-clang running on bolt-worker-aarch64 while building llvm at step 5 "build-clang-bolt".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/128/builds/8022

Here is the relevant piece of the build log for the reference
Step 5 (build-clang-bolt) failure: build (failure)
...
29.556 [8/2/3351] Linking CXX static library lib/libclangIndex.a
29.607 [6/3/3352] Linking CXX static library lib/libclangCrossTU.a
29.770 [5/3/3353] Linking CXX static library lib/libclangExtractAPI.a
30.065 [5/2/3354] Linking CXX static library lib/libclangStaticAnalyzerCore.a
30.420 [4/2/3355] Linking CXX static library lib/libclangCodeGen.a
30.742 [4/1/3356] Linking CXX static library lib/libclangStaticAnalyzerCheckers.a
30.786 [3/1/3357] Linking CXX static library lib/libclangStaticAnalyzerFrontend.a
30.821 [2/1/3358] Linking CXX static library lib/libclangFrontendTool.a
31.628 [1/1/3359] Linking CXX executable bin/clang-22
167.190 [0/1/3360] Creating executable symlink bin/clang
FAILED: bin/clang 
/usr/bin/cmake -E cmake_symlink_executable bin/clang-22 bin/clang && cd /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver && /usr/bin/cmake -E create_symlink clang-22 /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/./bin/clang++ && cd /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver && /usr/bin/cmake -E create_symlink clang-22 /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/./bin/clang-cl && cd /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver && /usr/bin/cmake -E create_symlink clang-22 /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/./bin/clang-cpp && cd /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver && /usr/bin/python3 /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/llvm-project/clang/tools/driver/../../utils/perf-training/perf-helper.py bolt-optimize --method INSTRUMENT --input /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/bin/clang-22 --instrumented-output /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/./bin/clang-bolt.inst --fdata /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver/../../utils/perf-training/prof.fdata --perf-training-binary-dir /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver/../../utils/perf-training --readelf /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/bin/llvm-readobj --bolt /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/bin/llvm-bolt --lit /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/./bin/llvm-lit --merge-fdata /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/bin/merge-fdata
Running: /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/bin/llvm-bolt /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/bin/clang-22 -o /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/./bin/clang-bolt.inst -instrument --instrumentation-file-append-pid --instrumentation-file=/home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver/../../utils/perf-training/prof.fdata
BOLT-INFO: shared object or position-independent executable detected
BOLT-INFO: Target architecture: aarch64
BOLT-INFO: BOLT version: <unknown>
BOLT-INFO: first alloc address is 0x0
BOLT-INFO: creating new program header table at address 0x8a00000, offset 0x8a00000
BOLT-INFO: enabling relocation mode
BOLT-INFO: forcing -jump-tables=move for instrumentation
BOLT-WARNING: 1 collisions detected while hashing binary objects. Use -v=1 to see the list.
BOLT-INFO: MarkRAStates ran on 2 functions. Ignored 0 functions (0.00%) because of CFI inconsistencies
BOLT-INFO: number of removed linker-inserted veneers: 0
BOLT-INFO: 0 out of 154877 functions in the binary (0.0%) have non-empty execution profile
BOLT-INSTRUMENTER: Number of indirect call site descriptors: 60598
BOLT-INSTRUMENTER: Number of indirect call target descriptors: 152673
BOLT-INSTRUMENTER: Number of function descriptors: 152662
BOLT-INSTRUMENTER: Number of branch counters: 2005370
BOLT-INSTRUMENTER: Number of ST leaf node counters: 1022427
BOLT-INSTRUMENTER: Number of direct call counters: 0
BOLT-INSTRUMENTER: Total number of counters: 3027797
BOLT-INSTRUMENTER: Total size of counters: 24222376 bytes (static alloc memory)
BOLT-INSTRUMENTER: Total size of string table emitted: 15982999 bytes in file
BOLT-INSTRUMENTER: Total size of descriptors: 223741644 bytes in file
BOLT-INSTRUMENTER: Profile will be saved to file /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver/../../utils/perf-training/prof.fdata
BOLT-INFO: removed 22156 empty blocks
BOLT-INFO: merged 6 duplicate CFG edges
BOLT-INFO: Starting stub-insertion pass
BOLT-INFO: Inserted 1473 stubs in the hot area and 0 stubs in the cold area. Shared 171897 times, iterated 3 times.
BOLT-INFO: rewritten pac-ret DWARF info in 2 out of 155179 functions (0.00%).
BOLT-INFO: padding code to 0x13200000 to accommodate hot text
BOLT-INFO: output linked against instrumentation runtime library, lib entry point is 0x15e288c0
BOLT-INFO: clear procedure is 0x15e27540
BOLT-INFO: patched build-id (flipped last bit)
BOLT-INFO: setting __bolt_runtime_start to 0x15e288c0
BOLT-INFO: setting __bolt_runtime_fini to 0x15e28954
BOLT-INFO: setting __hot_start to 0x8c00000
BOLT-INFO: setting __hot_end to 0x130249e8
Running: /usr/bin/python3 /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/./bin/llvm-lit -v /home/buildbot/workspace/bolt-aarch64-ubuntu-clang/build/tools/clang/tools/driver/../../utils/perf-training/bolt-fdata

CongzheUalberta added a commit that referenced this pull request Oct 26, 2025
…#164694)

This reverts commit f1c1063.

PR #163453 was merged and reverted since it exposed a crash. 
After investigation the crash was unrelated and is then fixed in #164628.

This is an attempt to reland #163453.
varun-r-mallya pushed a commit to varun-r-mallya/llvm-project that referenced this pull request Oct 27, 2025
…)" (llvm#164694)

This reverts commit f1c1063.

PR llvm#163453 was merged and reverted since it exposed a crash. 
After investigation the crash was unrelated and is then fixed in llvm#164628.

This is an attempt to reland llvm#163453.
dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025
…eCombine (llvm#164628)

Since new select/phi instructions may construct loops, the expression
tree to be simplified may still be incomplete (i.e., it may contain
select with dummy values or phi without incoming values). This patch
removes the call to simplifyInstruction for now, as it doesn't break
existing tests.

Original PR: https://reviews.llvm.org/D36073
Fix the crash reported in
llvm#163453 (comment).
dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025
…)" (llvm#164694)

This reverts commit f1c1063.

PR llvm#163453 was merged and reverted since it exposed a crash. 
After investigation the crash was unrelated and is then fixed in llvm#164628.

This is an attempt to reland llvm#163453.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants