From 4ff4d682a3703ef404cddf79613c4b73375b986d Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 21 Nov 2025 21:13:14 -0500 Subject: [PATCH] [ReplaceConstant] Don't create instructions for the same constant multiple times in the same basic block Fixes #167500. --- llvm/lib/IR/ReplaceConstant.cpp | 10 +++- ...s-variable-multiple-use-in-one-phi-node.ll | 51 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp index b3586b45a23f2..f3d1914a8dc82 100644 --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -91,6 +91,11 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef Consts, // Replace those expandable operands with instructions bool Changed = false; + // We need to cache the instructions we've already expanded to avoid expanding + // the same constant multiple times in the same basic block, which is + // problematic when the same constant is used in a phi node multiple times. + DenseMap, SmallVector> + ConstantToInstructionMap; while (!InstructionWorklist.empty()) { Instruction *I = InstructionWorklist.pop_back_val(); DebugLoc Loc = I->getDebugLoc(); @@ -105,7 +110,10 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef Consts, if (auto *C = dyn_cast(U.get())) { if (ExpandableUsers.contains(C)) { Changed = true; - auto NewInsts = expandUser(BI, C); + SmallVector &NewInsts = + ConstantToInstructionMap[std::make_pair(C, BI->getParent())]; + if (NewInsts.empty()) + NewInsts = expandUser(BI, C); for (auto *NI : NewInsts) NI->setDebugLoc(Loc); InstructionWorklist.insert_range(NewInsts); diff --git a/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll b/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll new file mode 100644 index 0000000000000..35a9bee03411f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-module-lds %s -o - | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-module-lds %s -o - | FileCheck %s + +@lds = internal unnamed_addr addrspace(3) global [6144 x half] poison, align 2 + +define amdgpu_kernel void @test(ptr addrspace(1) %out) { +; CHECK-LABEL: define amdgpu_kernel void @test( +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: switch i32 0, label %[[BB_3:.*]] [ +; CHECK-NEXT: i32 18, label %[[BB_2:.*]] +; CHECK-NEXT: i32 1, label %[[BB_2]] +; CHECK-NEXT: i32 0, label %[[BB_3]] +; CHECK-NEXT: ] +; CHECK: [[BB_1:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.test.lds to ptr +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: switch i32 0, label %[[BB_3]] [ +; CHECK-NEXT: i32 18, label %[[BB_2]] +; CHECK-NEXT: i32 1, label %[[BB_2]] +; CHECK-NEXT: i32 0, label %[[BB_3]] +; CHECK-NEXT: ] +; CHECK: [[BB_2]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP1]], %[[BB_1]] ], [ [[TMP1]], %[[BB_1]] ], [ 10, %[[ENTRY]] ], [ 10, %[[ENTRY]] ] +; CHECK-NEXT: store i64 [[PHI]], ptr addrspace(1) [[OUT]], align 8 +; CHECK-NEXT: br label %[[BB_3]] +; CHECK: [[BB_3]]: +; CHECK-NEXT: ret void +; +entry: + switch i32 0, label %bb.3 [ + i32 18, label %bb.2 + i32 1, label %bb.2 + i32 0, label %bb.3 + ] +bb.1: + switch i32 0, label %bb.3 [ + i32 18, label %bb.2 + i32 1, label %bb.2 + i32 0, label %bb.3 + ] + +bb.2: + %phi = phi i64 [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [10, %entry], [10, %entry] + store i64 %phi, ptr addrspace(1) %out, align 8 + br label %bb.3 + +bb.3: + ret void +}