Skip to content

Commit

Permalink
CodeGen: Optimize lowering of is.fpclass fcZero|fcSubnormal
Browse files Browse the repository at this point in the history
Combine the two checks into a check if the exponent bits are 0. The
inverted case isn't reachable until a future change, and GlobalISel
currently doesn't attempt the inversion optimization.

https://reviews.llvm.org/D143182
  • Loading branch information
arsenm committed Jul 6, 2023
1 parent 5e763d3 commit 61820f8
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 145 deletions.
15 changes: 15 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Expand Up @@ -7274,6 +7274,9 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
return Legalized;
}

// TODO: Try inverting the test with getInvertedFPClassTest like the DAG
// version

unsigned BitSize = SrcTy.getScalarSizeInBits();
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());

Expand Down Expand Up @@ -7329,6 +7332,18 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
Mask &= ~fcNegFinite;
}

if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
// fcZero | fcSubnormal => test all exponent bits are 0
// TODO: Handle sign bit specific cases
// TODO: Handle inverted case
if (PartialCheck == (fcZero | fcSubnormal)) {
auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
ExpBits, ZeroC));
Mask &= ~PartialCheck;
}
}

// Check for individual classes.
if (FPClassTest PartialCheck = Mask & fcZero) {
if (PartialCheck == fcPosZero)
Expand Down
20 changes: 15 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -8108,12 +8108,8 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// exceptions are ignored.
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
// Even if the condition isn't legal, we're probably better off expanding it
// if it's the combined 0 || denormal compare.

if (isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()) &&
(Test != fcZero ||
isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
(isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
OperandVT.getScalarType().getSimpleVT()))) {
// If denormals could be implicitly treated as 0, this is not equivalent
// to a compare with 0 since it will also be true for denormals.
Expand Down Expand Up @@ -8207,6 +8203,20 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
}
appendResult(PartialRes);

if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
// fcZero | fcSubnormal => test all exponent bits are 0
// TODO: Handle sign bit specific cases
if (PartialCheck == (fcZero | fcSubnormal)) {
assert(!IsInverted && "should handle inverted case");

SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
SDValue ExpIsZero =
DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
appendResult(ExpIsZero);
Test &= ~PartialCheck & fcAllFlags;
}
}

// Check for individual classes.

if (unsigned PartialCheck = Test & fcZero) {
Expand Down
68 changes: 25 additions & 43 deletions llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
Expand Up @@ -1504,27 +1504,18 @@ define i1 @issubnormal_or_zero_f16(half %x) {
; GFX7SELDAG: ; %bb.0: ; %entry
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x3ff
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7SELDAG-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0
; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s6, v0
; GFX7SELDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX7GLISEL-LABEL: issubnormal_or_zero_f16:
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: issubnormal_or_zero_f16:
Expand Down Expand Up @@ -1701,18 +1692,15 @@ define i1 @not_isnormal_f16(half %x) {
; GFX7GLISEL-LABEL: not_isnormal_f16:
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v0
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v1
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -1770,23 +1758,20 @@ define i1 @not_is_plus_normal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3
; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v2, v3
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
Expand Down Expand Up @@ -1846,23 +1831,20 @@ define i1 @not_is_neg_normal_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7c00, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v3, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v3
; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v2
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s8, v3
; GFX7GLISEL-NEXT: s_or_b64 s[6:7], s[6:7], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v2, v3
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
Expand Down

0 comments on commit 61820f8

Please sign in to comment.