Skip to content

Commit

Permalink
DAG: Remove hasBitPreservingFPLogic
Browse files Browse the repository at this point in the history
This doesn't make sense as an option. fneg and fabs are bit
preserving by definition. If a target has some fneg or fabs
instruction that are not bitpreserving it's incorrect to lower
fneg/fabs to use it.
  • Loading branch information
arsenm committed Feb 14, 2023
1 parent d94399c commit 09dd4d8
Show file tree
Hide file tree
Showing 16 changed files with 39 additions and 90 deletions.
8 changes: 0 additions & 8 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -644,14 +644,6 @@ class TargetLoweringBase {
/// gen prepare.
virtual bool preferZeroCompareBranch() const { return false; }

/// Return true if it is safe to transform an integer-domain bitwise operation
/// into the equivalent floating-point operation. This should be set to true
/// if the target has IEEE-754-compliant fabs/fneg operations for the input
/// type.
virtual bool hasBitPreservingFPLogic(EVT VT) const {
return false;
}

/// Return true if it is cheaper to split the store of a merged int val
/// from a pair of smaller values into multiple stores.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
Expand Down
17 changes: 12 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -585,6 +585,9 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI);

SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
Expand Down Expand Up @@ -14399,18 +14402,19 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
}

static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
// If this is not a bitcast to an FP type or if the target doesn't have
// IEEE754-compliant FP logic, we're done.
EVT VT = N->getValueType(0);
if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
SDValue N0 = N->getOperand(0);
EVT SourceVT = N0.getValueType();

if (!VT.isFloatingPoint())
return SDValue();

// TODO: Handle cases where the integer constant is a different scalar
// bitwidth to the FP.
SDValue N0 = N->getOperand(0);
EVT SourceVT = N0.getValueType();
if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
return SDValue();

Expand All @@ -14433,6 +14437,9 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
return SDValue();

// This needs to be the inverse of logic in foldSignChangeInBitcast.
// FIXME: I don't think looking for bitcast intrinsically makes sense, but
// removing this would require more changes.
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -840,11 +840,6 @@ class AArch64TargetLowering : public TargetLowering {
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;

bool hasBitPreservingFPLogic(EVT VT) const override {
// FIXME: Is this always true? It should be true for vectors at least.
return VT == MVT::f32 || VT == MVT::f64;
}

bool supportSplitCSR(MachineFunction *MF) const override {
return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -4482,10 +4482,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
}

bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return isTypeLegal(VT.getScalarType());
}

bool SITargetLowering::hasAtomicFaddRtnForTy(SDValue &Op) const {
switch (Op.getValue(0).getSimpleValueType().SimpleTy) {
case MVT::f32:
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.h
Expand Up @@ -396,7 +396,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;

bool hasBitPreservingFPLogic(EVT VT) const override;
bool hasAtomicFaddRtnForTy(SDValue &Op) const;
bool enableAggressiveFMAFusion(EVT VT) const override;
bool enableAggressiveFMAFusion(LLT Ty) const override;
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -17638,15 +17638,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
}

bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
if (!Subtarget.hasVSX())
return false;
if (Subtarget.hasP9Vector() && VT == MVT::f128)
return true;
return VT == MVT::f32 || VT == MVT::f64 ||
VT == MVT::v4f32 || VT == MVT::v2f64;
}

bool PPCTargetLowering::
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
const Value *Mask = AndI.getOperand(1);
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.h
Expand Up @@ -1468,7 +1468,6 @@ namespace llvm {
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;

/// getAddrModeForFlags - Based on the set of address flags, select the most
Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -1525,12 +1525,6 @@ bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
return Index == 0 || Index == ResElts;
}

bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return (VT == MVT::f16 && Subtarget.hasStdExtZfhOrZfhmin()) ||
(VT == MVT::f32 && Subtarget.hasStdExtF()) ||
(VT == MVT::f64 && Subtarget.hasStdExtD());
}

MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.h
Expand Up @@ -409,7 +409,6 @@ class RISCVTargetLowering : public TargetLowering {
/// should be stack expanded.
bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;

bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
// If the pair to store is a mixture of float and int values, we will
// save two bitwise instructions and one float-to-int instruction and
Expand All @@ -425,6 +424,7 @@ class RISCVTargetLowering : public TargetLowering {
// out until we get testcase to prove it is a win.
return false;
}

bool
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -1,3 +1,4 @@

//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
Expand Down Expand Up @@ -423,10 +424,6 @@ class SystemZTargetLowering : public TargetLowering {
}
bool isCheapToSpeculateCtlz(Type *) const override { return true; }
bool preferZeroCompareBranch() const override { return true; }
bool hasBitPreservingFPLogic(EVT VT) const override {
EVT ScVT = VT.getScalarType();
return ScVT == MVT::f32 || ScVT == MVT::f64 || ScVT == MVT::f128;
}
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
return Mask && Mask->getValue().isIntN(16);
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -6001,10 +6001,6 @@ bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return Subtarget.hasLZCNT();
}

bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
}

bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
// Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
// expensive than a straight movsd. On the other hand, it's important to
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -1088,8 +1088,6 @@ namespace llvm {

bool isCtlzFast() const override;

bool hasBitPreservingFPLogic(EVT VT) const override;

bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
// If the pair to store is a mixture of float and int values, we will
// save two bitwise instructions and one float-to-int instruction and
Expand Down
22 changes: 12 additions & 10 deletions llvm/test/CodeGen/AMDGPU/fneg.ll
Expand Up @@ -188,8 +188,8 @@ define i16 @v_fneg_i16(i16 %in) {
}

; FUNC-LABEL: {{^}}s_fneg_i16_fp_use:
; SI: v_cvt_f32_f16_e64 [[CVT0:v[0-9]+]], -s{{[0-9]+}}
; SI: v_add_f32_e32 [[ADD:v[0-9]+]], 2.0, [[CVT0]]
; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], s{{[0-9]+}}
; SI: v_sub_f32_e32 [[ADD:v[0-9]+]], 2.0, [[CVT0]]
; SI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[ADD]]

; VI: s_load_dword [[IN:s[0-9]+]]
Expand All @@ -204,8 +204,8 @@ define amdgpu_kernel void @s_fneg_i16_fp_use(ptr addrspace(1) %out, i16 %in) {

; FUNC-LABEL: {{^}}v_fneg_i16_fp_use:
; SI: s_waitcnt
; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
; SI-NEXT: v_add_f32_e32 v0, 2.0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_sub_f32_e32 v0, 2.0, v0
; SI-NEXT: s_setpc_b64

; VI: s_waitcnt
Expand Down Expand Up @@ -257,8 +257,10 @@ define <2 x i16> @v_fneg_v2i16(<2 x i16> %in) {

; FUNC-LABEL: {{^}}s_fneg_v2i16_fp_use:
; SI: s_lshr_b32 s3, s2, 16
; SI: v_cvt_f32_f16_e64 v0, -s3
; SI: v_cvt_f32_f16_e64 v1, -s2
; SI: v_cvt_f32_f16_e32 v0, s3
; SI: v_cvt_f32_f16_e32 v1, s2
; SI: v_sub_f32_e32 v0, 2.0, v0
; SI: v_sub_f32_e32 v1, 2.0, v1

; VI: s_lshr_b32 s5, s4, 16
; VI: s_xor_b32 s5, s5, 0x8000
Expand All @@ -278,10 +280,10 @@ define amdgpu_kernel void @s_fneg_v2i16_fp_use(ptr addrspace(1) %out, i32 %arg)

; FUNC-LABEL: {{^}}v_fneg_v2i16_fp_use:
; SI: v_lshrrev_b32_e32 v1, 16, v0
; SI: v_cvt_f32_f16_e64 v0, -v0
; SI: v_cvt_f32_f16_e64 v1, -v1
; SI: v_add_f32_e32 v0, 2.0, v0
; SI: v_add_f32_e32 v1, 2.0, v1
; SI: v_cvt_f32_f16_e32 v0, v0
; SI: v_cvt_f32_f16_e32 v1, v1
; SI: v_sub_f32_e32 v0, 2.0, v0
; SI: v_sub_f32_e32 v1, 2.0, v1

; VI: s_waitcnt
; VI: v_mov_b32_e32 v1, 0x4000
Expand Down
7 changes: 1 addition & 6 deletions llvm/test/CodeGen/PowerPC/fabs.ll
Expand Up @@ -13,12 +13,7 @@ define double @fabs(double %f) {
define float @bitcast_fabs(float %x) {
; CHECK-LABEL: bitcast_fabs:
; CHECK: # %bb.0:
; CHECK: stfs f1, 8(r1)
; CHECK: lwz r3, 8(r1)
; CHECK-NEXT: clrlwi r3, r3, 1
; CHECK-NEXT: stw r3, 12(r1)
; CHECK-NEXT: lfs f1, 12(r1)
; CHECK-NEXT: addi r1, r1, 16
; CHECK-NEXT: fabs f1, f1
; CHECK-NEXT: blr
;
%bc1 = bitcast float %x to i32
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll
Expand Up @@ -211,8 +211,8 @@ define half @movmsk(half %x) {
define half @bitcast_fabs(half %x) {
; CHECK-LABEL: bitcast_fabs:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%bc1 = bitcast half %x to i16
%and = and i16 %bc1, 32767
Expand All @@ -223,8 +223,8 @@ define half @bitcast_fabs(half %x) {
define half @bitcast_fneg(half %x) {
; CHECK-LABEL: bitcast_fneg:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%bc1 = bitcast half %x to i16
%xor = xor i16 %bc1, 32768
Expand Down Expand Up @@ -285,8 +285,8 @@ define half @fsub_bitcast_fneg(half %x, half %y) {
define half @nabs(half %a) {
; CHECK-LABEL: nabs:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%conv = bitcast half %a to i16
%and = or i16 %conv, -32768
Expand Down
24 changes: 6 additions & 18 deletions llvm/test/CodeGen/X86/fp128-i128.ll
Expand Up @@ -130,42 +130,30 @@ entry:
define fp128 @TestI128_1(fp128 %x) #0 {
; SSE-LABEL: TestI128_1:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $40, %rsp
; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; SSE-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
; SSE-NEXT: andq {{[0-9]+}}(%rsp), %rax
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, (%rsp)
; SSE-NEXT: movaps (%rsp), %xmm0
; SSE-NEXT: pushq %rax
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: callq __lttf2@PLT
; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: sets %cl
; SSE-NEXT: shlq $4, %rcx
; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rcx), %xmm0
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: popq %rax
; SSE-NEXT: retq
;
; AVX-LABEL: TestI128_1:
; AVX: # %bb.0: # %entry
; AVX-NEXT: subq $40, %rsp
; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
; AVX-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
; AVX-NEXT: andq {{[0-9]+}}(%rsp), %rax
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; AVX-NEXT: movq %rcx, (%rsp)
; AVX-NEXT: vmovaps (%rsp), %xmm0
; AVX-NEXT: pushq %rax
; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; AVX-NEXT: callq __lttf2@PLT
; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: sets %cl
; AVX-NEXT: shlq $4, %rcx
; AVX-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rcx), %xmm0
; AVX-NEXT: addq $40, %rsp
; AVX-NEXT: popq %rax
; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
Expand Down

0 comments on commit 09dd4d8

Please sign in to comment.