Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4638,6 +4638,12 @@ static std::optional<ConstantRange> getRange(const Instruction &I) {
return std::nullopt;
}

static FPClassTest getNoFPClass(const Instruction &I) {
if (const auto *CB = dyn_cast<CallBase>(&I))
return CB->getRetNoFPClass();
return fcNone;
}

void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
Expand Down Expand Up @@ -9132,6 +9138,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,

if (Result.first.getNode()) {
Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
Result.first = lowerNoFPClassToAssertNoFPClass(DAG, CB, Result.first);
setValue(&CB, Result.first);
}

Expand Down Expand Up @@ -10718,6 +10725,16 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return DAG.getMergeValues(Ops, SL);
}

SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass(
SelectionDAG &DAG, const Instruction &I, SDValue Op) {
FPClassTest Classes = getNoFPClass(I);
if (Classes == fcNone)
return Op;

return DAG.getNode(ISD::AssertNoFPClass, SDLoc(Op), Op.getValueType(), Op,
DAG.getTargetConstant(Classes, SDLoc(), MVT::i32));
}

/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
/// the call being lowered.
///
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,10 @@ class SelectionDAGBuilder {
SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
SDValue Op);

// Lower nofpclass attributes to AssertNoFPClass
SDValue lowerNoFPClassToAssertNoFPClass(SelectionDAG &DAG,
const Instruction &I, SDValue Op);

void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
const CallBase *Call, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/CodeGen/AMDGPU/nofpclass-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ define float @call_nofpclass_funcs_f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: v_mov_b32_e32 v3, v0
; CHECK-NEXT: v_mov_b32_e32 v0, v2
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_max_f32_e32 v1, v3, v3
; CHECK-NEXT: v_max_f32_e32 v0, v0, v0
; CHECK-NEXT: v_min_f32_e32 v0, v1, v0
; CHECK-NEXT: v_min_f32_e32 v0, v3, v0
; CHECK-NEXT: v_readlane_b32 s31, v4, 1
; CHECK-NEXT: v_readlane_b32 s30, v4, 0
; CHECK-NEXT: s_mov_b32 s32, s33
Expand Down Expand Up @@ -87,12 +85,8 @@ define <2 x float> @call_nofpclass_funcs_v2f32(ptr addrspace(1) %ptr) {
; CHECK-NEXT: v_mov_b32_e32 v0, v3
; CHECK-NEXT: v_mov_b32_e32 v1, v2
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_max_f32_e32 v2, v4, v4
; CHECK-NEXT: v_max_f32_e32 v0, v0, v0
; CHECK-NEXT: v_min_f32_e32 v0, v2, v0
; CHECK-NEXT: v_max_f32_e32 v2, v5, v5
; CHECK-NEXT: v_max_f32_e32 v1, v1, v1
; CHECK-NEXT: v_min_f32_e32 v1, v2, v1
; CHECK-NEXT: v_min_f32_e32 v0, v4, v0
; CHECK-NEXT: v_min_f32_e32 v1, v5, v1
; CHECK-NEXT: v_readlane_b32 s31, v6, 1
; CHECK-NEXT: v_readlane_b32 s30, v6, 0
; CHECK-NEXT: s_mov_b32 s32, s33
Expand Down Expand Up @@ -142,12 +136,10 @@ define double @call_nofpclass_funcs_f64(ptr addrspace(1) %ptr) {
; CHECK-NEXT: v_mov_b32_e32 v0, v5
; CHECK-NEXT: v_mov_b32_e32 v1, v4
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; CHECK-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; CHECK-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
; CHECK-NEXT: v_readlane_b32 s31, v6, 1
; CHECK-NEXT: v_readlane_b32 s30, v6, 0
; CHECK-NEXT: s_mov_b32 s32, s33
; CHECK-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
Expand Down
Loading