From b86a3c55e464eb2f7f9b8f289b0880da577f4f78 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 9 Nov 2025 18:45:32 -0800 Subject: [PATCH] DAG: Add AssertNoFPClass from call return attributes This defends against regressions in future patches. This excludes the target intrinsic case for now; I'm worried introducing an intermediate AssertNoFPClass is likely to break combines. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 17 +++++++++++++++++ .../CodeGen/SelectionDAG/SelectionDAGBuilder.h | 4 ++++ llvm/test/CodeGen/AMDGPU/nofpclass-call.ll | 16 ++++------------ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 88b0809b767b5..6a9022dff41ad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4638,6 +4638,12 @@ static std::optional getRange(const Instruction &I) { return std::nullopt; } +static FPClassTest getNoFPClass(const Instruction &I) { + if (const auto *CB = dyn_cast(&I)) + return CB->getRetNoFPClass(); + return fcNone; +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -9132,6 +9138,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, if (Result.first.getNode()) { Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first); + Result.first = lowerNoFPClassToAssertNoFPClass(DAG, CB, Result.first); setValue(&CB, Result.first); } @@ -10718,6 +10725,16 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return DAG.getMergeValues(Ops, SL); } +SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass( + SelectionDAG &DAG, const Instruction &I, SDValue Op) { + FPClassTest Classes = getNoFPClass(I); + if (Classes == fcNone) + return Op; + + return DAG.getNode(ISD::AssertNoFPClass, SDLoc(Op), Op.getValueType(), Op, + DAG.getTargetConstant(Classes, SDLoc(), MVT::i32)); +} + /// Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ed63bee58c957..13e2daa783147 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -429,6 +429,10 @@ class SelectionDAGBuilder { SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op); + // Lower nofpclass attributes to AssertNoFPClass + SDValue lowerNoFPClassToAssertNoFPClass(SelectionDAG &DAG, + const Instruction &I, SDValue Op); + void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, diff --git a/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll index 1861f02ec8b1c..5f303cc2a1eef 100644 --- a/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll +++ b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll @@ -35,9 +35,7 @@ define float @call_nofpclass_funcs_f32(ptr addrspace(1) %ptr) { ; CHECK-NEXT: v_mov_b32_e32 v3, v0 ; CHECK-NEXT: v_mov_b32_e32 v0, v2 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_max_f32_e32 v1, v3, v3 -; CHECK-NEXT: v_max_f32_e32 v0, v0, v0 -; CHECK-NEXT: v_min_f32_e32 v0, v1, v0 +; CHECK-NEXT: v_min_f32_e32 v0, v3, v0 ; CHECK-NEXT: v_readlane_b32 s31, v4, 1 ; CHECK-NEXT: v_readlane_b32 s30, v4, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 @@ -87,12 +85,8 @@ define <2 x float> @call_nofpclass_funcs_v2f32(ptr addrspace(1) %ptr) { ; CHECK-NEXT: v_mov_b32_e32 v0, v3 ; CHECK-NEXT: v_mov_b32_e32 v1, v2 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_max_f32_e32 v2, v4, v4 -; CHECK-NEXT: v_max_f32_e32 v0, v0, v0 -; CHECK-NEXT: v_min_f32_e32 v0, v2, v0 -; CHECK-NEXT: v_max_f32_e32 v2, v5, v5 -; CHECK-NEXT: v_max_f32_e32 v1, v1, v1 -; CHECK-NEXT: v_min_f32_e32 v1, v2, v1 +; CHECK-NEXT: v_min_f32_e32 v0, v4, v0 +; CHECK-NEXT: v_min_f32_e32 v1, v5, v1 ; CHECK-NEXT: v_readlane_b32 s31, v6, 1 ; CHECK-NEXT: v_readlane_b32 s30, v6, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 @@ -142,12 +136,10 @@ define double @call_nofpclass_funcs_f64(ptr addrspace(1) %ptr) { ; CHECK-NEXT: v_mov_b32_e32 v0, v5 ; CHECK-NEXT: v_mov_b32_e32 v1, v4 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; CHECK-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; CHECK-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] ; CHECK-NEXT: v_readlane_b32 s31, v6, 1 ; CHECK-NEXT: v_readlane_b32 s30, v6, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1] ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5]