From 863f6c3604986292d083d82b8e634ada964286a0 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Tue, 2 Sep 2025 14:20:35 -0400 Subject: [PATCH 1/2] [GVN] Turn off ScalarPRE for TokenLike Types fixes #154407 In HLSL the GVNPass was adding a phi node on a target extention type. https://hlsl.godbolt.org/z/sc14YenEe This is something we cleaned up in a past PR (https://github.com/llvm/llvm-project/pull/154620) by introducing `isTokenLikeTy`. In the case of the GVN pass the target extention type was still making its way through. This change makes it so if we see this type we don't do PRE. --- llvm/lib/Transforms/Scalar/GVN.cpp | 3 +- .../Transforms/GVN/PRE/no-phi-translate.ll | 57 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/GVN/PRE/no-phi-translate.ll diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 4baa3b3eb8242..26e17cc849bff 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2982,7 +2982,8 @@ bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, bool GVNPass::performScalarPRE(Instruction *CurInst) { if (isa(CurInst) || CurInst->isTerminator() || isa(CurInst) || CurInst->getType()->isVoidTy() || - CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects()) + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || + CurInst->getType()->isTokenLikeTy()) return false; // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from diff --git a/llvm/test/Transforms/GVN/PRE/no-phi-translate.ll b/llvm/test/Transforms/GVN/PRE/no-phi-translate.ll new file mode 100644 index 0000000000000..cf49295936a0c --- /dev/null +++ b/llvm/test/Transforms/GVN/PRE/no-phi-translate.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=gvn %s | FileCheck %s + +; NOTE: when we use a Token like type we should not introduce a phi + +@Out.str = private unnamed_addr constant [4 x i8] c"Out\00", align 1 + +define void @CSMain() local_unnamed_addr { +; CHECK-LABEL: define void @CSMain() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.dx.flattened.thread.id.in.group() +; CHECK-NEXT: [[CMP_I1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP_I1_NOT]], label %[[CSMAIN_EXIT:.*]], label %[[FOR_BODY_I_LR_PH:.*]] +; CHECK: [[FOR_BODY_I_LR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 [[TMP0]], ptr nonnull @Out.str) +; CHECK-NEXT: br label %[[FOR_BODY_I:.*]] +; CHECK: [[FOR_BODY_I]]: +; CHECK-NEXT: [[I_0_I2:%.*]] = phi i32 [ 0, %[[FOR_BODY_I_LR_PH]] ], [ [[INC_I:%.*]], %[[FOR_BODY_I]] ] +; CHECK-NEXT: [[TMP2:%.*]] = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP1]], i8 1) +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I2]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC_I]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY_I]], label %[[CSMAIN_EXIT_LOOPEXIT:.*]] +; CHECK: [[CSMAIN_EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[CSMAIN_EXIT]] +; CHECK: [[CSMAIN_EXIT]]: +; CHECK-NEXT: [[TMP3:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 [[TMP0]], ptr nonnull @Out.str) +; CHECK-NEXT: [[TMP4:%.*]] = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP3]], i8 1) +; CHECK-NEXT: [[TMP5:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP3]], i32 0) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = tail call i32 @llvm.dx.flattened.thread.id.in.group() + %cmp.i1.not = icmp eq i32 %0, 0 + br i1 %cmp.i1.not, label %CSMain.exit, label %for.body.i.lr.ph + +for.body.i.lr.ph: ; preds = %entry + %1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 %0, ptr nonnull @Out.str) + br label %for.body.i + +for.body.i: ; preds = %for.body.i.lr.ph, %for.body.i + %i.0.i2 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.body.i ] + %2 = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i8 1) + %inc.i = add nuw nsw i32 %i.0.i2, 1 + %exitcond = icmp ne i32 %inc.i, %0 + br i1 %exitcond, label %for.body.i, label %CSMain.exit.loopexit + +CSMain.exit.loopexit: ; preds = %for.body.i + br label %CSMain.exit + +CSMain.exit: ; preds = %CSMain.exit.loopexit, %entry + %3 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 %0, ptr nonnull @Out.str) + %4 = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %3, i8 1) + %5 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %3, i32 0) + store i32 %4, ptr %5, align 4 + ret void +} From e887729816f3152e196ca1b4023f16d261424024 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Tue, 2 Sep 2025 16:34:59 -0400 Subject: [PATCH 2/2] update test case to address pr comments. --- .../Transforms/GVN/PRE/no-phi-translate.ll | 68 +++++++++---------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/llvm/test/Transforms/GVN/PRE/no-phi-translate.ll b/llvm/test/Transforms/GVN/PRE/no-phi-translate.ll index cf49295936a0c..d330ec46f9905 100644 --- a/llvm/test/Transforms/GVN/PRE/no-phi-translate.ll +++ b/llvm/test/Transforms/GVN/PRE/no-phi-translate.ll @@ -1,57 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -passes=gvn %s | FileCheck %s -; NOTE: when we use a Token like type we should not introduce a phi +; NOTE: A test to confirm GVN doesn't introduce phis for token like types. +; NOTE: This implies the CHECKS should exactly match the IR. +%"$Globals" = type { i32 } +@CBV = external constant %"$Globals" @Out.str = private unnamed_addr constant [4 x i8] c"Out\00", align 1 -define void @CSMain() local_unnamed_addr { -; CHECK-LABEL: define void @CSMain() local_unnamed_addr { +define i32 @CSMain() local_unnamed_addr { +; CHECK-LABEL: define i32 @CSMain() local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.dx.flattened.thread.id.in.group() -; CHECK-NEXT: [[CMP_I1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: [[LOADGLOBAL:%.*]] = load i32, ptr @CBV, align 4 +; CHECK-NEXT: [[CMP_I1_NOT:%.*]] = icmp eq i32 [[LOADGLOBAL]], 0 ; CHECK-NEXT: br i1 [[CMP_I1_NOT]], label %[[CSMAIN_EXIT:.*]], label %[[FOR_BODY_I_LR_PH:.*]] ; CHECK: [[FOR_BODY_I_LR_PH]]: -; CHECK-NEXT: [[TMP1:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 [[TMP0]], ptr nonnull @Out.str) +; CHECK-NEXT: [[BUF:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 [[LOADGLOBAL]], ptr nonnull @Out.str) ; CHECK-NEXT: br label %[[FOR_BODY_I:.*]] ; CHECK: [[FOR_BODY_I]]: -; CHECK-NEXT: [[I_0_I2:%.*]] = phi i32 [ 0, %[[FOR_BODY_I_LR_PH]] ], [ [[INC_I:%.*]], %[[FOR_BODY_I]] ] -; CHECK-NEXT: [[TMP2:%.*]] = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP1]], i8 1) -; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I2]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC_I]], [[TMP0]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY_I]], label %[[CSMAIN_EXIT_LOOPEXIT:.*]] -; CHECK: [[CSMAIN_EXIT_LOOPEXIT]]: -; CHECK-NEXT: br label %[[CSMAIN_EXIT]] +; CHECK-NEXT: [[LOOPPHI:%.*]] = phi i32 [ 0, %[[FOR_BODY_I_LR_PH]] ], [ [[INC_I:%.*]], %[[FOR_BODY_I]] ] +; CHECK-NEXT: [[UPDATECNT:%.*]] = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[BUF]], i8 1) +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[LOOPPHI]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC_I]], [[LOADGLOBAL]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY_I]], label %[[CSMAIN_EXIT]] ; CHECK: [[CSMAIN_EXIT]]: -; CHECK-NEXT: [[TMP3:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 [[TMP0]], ptr nonnull @Out.str) -; CHECK-NEXT: [[TMP4:%.*]] = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP3]], i8 1) -; CHECK-NEXT: [[TMP5:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP3]], i32 0) -; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 -; CHECK-NEXT: ret void +; CHECK-NEXT: [[BUFEXIT:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 [[LOADGLOBAL]], ptr nonnull @Out.str) +; CHECK-NEXT: [[UPDATECNTEXIT:%.*]] = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[BUFEXIT]], i8 1) +; CHECK-NEXT: ret i32 [[UPDATECNTEXIT]] ; entry: - %0 = tail call i32 @llvm.dx.flattened.thread.id.in.group() - %cmp.i1.not = icmp eq i32 %0, 0 + %loadGlobal = load i32, ptr @CBV, align 4 + %cmp.i1.not = icmp eq i32 %loadGlobal, 0 br i1 %cmp.i1.not, label %CSMain.exit, label %for.body.i.lr.ph -for.body.i.lr.ph: ; preds = %entry - %1 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 %0, ptr nonnull @Out.str) +for.body.i.lr.ph: + %buf = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 4, i32 %loadGlobal, ptr nonnull @Out.str) br label %for.body.i -for.body.i: ; preds = %for.body.i.lr.ph, %for.body.i - %i.0.i2 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.body.i ] - %2 = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %1, i8 1) - %inc.i = add nuw nsw i32 %i.0.i2, 1 - %exitcond = icmp ne i32 %inc.i, %0 - br i1 %exitcond, label %for.body.i, label %CSMain.exit.loopexit +for.body.i: + %loopPhi = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.body.i ] + %updateCnt = tail call noundef i32 @llvm.dx.resource.updatecounter(target("dx.RawBuffer", i32, 1, 0) %buf, i8 1) + %inc.i = add nuw nsw i32 %loopPhi, 1 + %exitcond = icmp ne i32 %inc.i, %loadGlobal + br i1 %exitcond, label %for.body.i, label %CSMain.exit -CSMain.exit.loopexit: ; preds = %for.body.i - br label %CSMain.exit - -CSMain.exit: ; preds = %CSMain.exit.loopexit, %entry - %3 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 0, i32 4, i32 %0, ptr nonnull @Out.str) - %4 = tail call noundef i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %3, i8 1) - %5 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %3, i32 0) - store i32 %4, ptr %5, align 4 - ret void +CSMain.exit: + %bufExit = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 4, i32 %loadGlobal, ptr nonnull @Out.str) + %updateCntExit = tail call noundef i32 @llvm.dx.resource.updatecounter(target("dx.RawBuffer", i32, 1, 0) %bufExit, i8 1) + ret i32 %updateCntExit }