Skip to content

Commit

Permalink
DAG: Fix ABI lowering with FP promote in strictfp functions (#74405)
Browse files Browse the repository at this point in the history
This was emitting non-strict casts in ABI contexts for illegal
types.
  • Loading branch information
arsenm committed Jan 18, 2024
1 parent fdcb76f commit 11bf02e
Show file tree
Hide file tree
Showing 7 changed files with 1,084 additions and 225 deletions.
59 changes: 37 additions & 22 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
SDValue InChain,
std::optional<CallingConv::ID> CC);

/// getCopyFromParts - Create a value that contains the specified legal parts
Expand All @@ -163,6 +164,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static SDValue
getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
SDValue InChain,
std::optional<CallingConv::ID> CC = std::nullopt,
std::optional<ISD::NodeType> AssertOp = std::nullopt) {
// Let the target assemble the parts if it wants to
Expand All @@ -173,7 +175,7 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,

if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC);
InChain, CC);

assert(NumParts > 0 && "No parts to assemble!");
SDValue Val = Parts[0];
Expand All @@ -194,10 +196,10 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);

if (RoundParts > 2) {
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
PartVT, HalfVT, V);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT, V);
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT, V,
InChain);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2,
PartVT, HalfVT, V, InChain);
} else {
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
Expand All @@ -213,7 +215,7 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
OddVT, V, CC);
OddVT, V, InChain, CC);

// Combine the round and odd parts.
Lo = Val;
Expand Down Expand Up @@ -243,7 +245,8 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V,
InChain, CC);
}
}

Expand Down Expand Up @@ -283,10 +286,20 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,

if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(
ISD::FP_ROUND, DL, ValueVT, Val,
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
if (ValueVT.bitsLT(Val.getValueType())) {

SDValue NoChange =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));

if (DAG.getMachineFunction().getFunction().getAttributes().hasFnAttr(
llvm::Attribute::StrictFP)) {
return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
DAG.getVTList(ValueVT, MVT::Other), InChain, Val,
NoChange);
}

return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, NoChange);
}

return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
Expand Down Expand Up @@ -324,6 +337,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
SDValue InChain,
std::optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
Expand Down Expand Up @@ -362,17 +376,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// If the register was not expanded, truncate or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
PartVT, IntermediateVT, V, CallConv);
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, PartVT, IntermediateVT,
V, InChain, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
// operands from the parts.
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
PartVT, IntermediateVT, V, CallConv);
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT,
IntermediateVT, V, InChain, CallConv);
}

// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
Expand Down Expand Up @@ -926,7 +940,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}

Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
RegisterVT, ValueVT, V, CallConv);
RegisterVT, ValueVT, V, Chain, CallConv);
Part += NumRegs;
Parts.clear();
}
Expand Down Expand Up @@ -10628,9 +10642,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);

ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
CLI.CallConv, AssertOp));
ReturnValues.push_back(getCopyFromParts(
CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr,
CLI.Chain, CLI.CallConv, AssertOp));
CurReg += NumRegs;
}

Expand Down Expand Up @@ -11109,8 +11123,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
std::optional<ISD::NodeType> AssertOp;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
nullptr, F.getCallingConv(), AssertOp);
SDValue ArgValue =
getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, NewRoot,
F.getCallingConv(), AssertOp);

MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
Expand Down Expand Up @@ -11182,7 +11197,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertZext;

ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr,
PartVT, VT, nullptr, NewRoot,
F.getCallingConv(), AssertOp));
}

Expand Down
51 changes: 5 additions & 46 deletions llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1094,52 +1094,11 @@ define <4 x i1> @isnan_v4bf16(<4 x bfloat> %x) nounwind {
ret <4 x i1> %1
}

define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind {
; GFX7CHECK-LABEL: isnan_bf16_strictfp:
; GFX7CHECK: ; %bb.0:
; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: isnan_bf16_strictfp:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX9CHECK-LABEL: isnan_bf16_strictfp:
; GFX9CHECK: ; %bb.0:
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX10CHECK-LABEL: isnan_bf16_strictfp:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX11CHECK-LABEL: isnan_bf16_strictfp:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
%1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan
ret i1 %1
}
; FIXME: Broken for gfx6/7
; define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind {
; %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan
; ret i1 %1
; }

define i1 @isinf_bf16(bfloat %x) nounwind {
; GFX7CHECK-LABEL: isinf_bf16:
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,9 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
Expand Down
110 changes: 0 additions & 110 deletions llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll

This file was deleted.

0 comments on commit 11bf02e

Please sign in to comment.