Skip to content

Commit

Permalink
[PowerPC] Avoid unnecessary fadd for unsigned to ppcf128
Browse files Browse the repository at this point in the history
Unsigned 32-bit or shorter integer to ppcf128 conversion are currently
expanded as signed-to-double with an extra fadd to 'complement'. But on
PowerPC we have native instruction to directly convert unsigned to
double since ISA v2.06. This patch exploits it.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D89786
  • Loading branch information
ecnelises committed Nov 1, 2020
1 parent ba447f3 commit 1f852ba
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 99 deletions.
11 changes: 5 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
Expand Up @@ -1636,16 +1636,14 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
// though.
if (SrcVT.bitsLE(MVT::i32)) {
// The integer can be represented exactly in an f64.
Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
MVT::i32, Src);
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
APInt(NVT.getSizeInBits(), 0)), dl, NVT);
if (Strict) {
Hi = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
DAG.getVTList(NVT, MVT::Other), {Chain, Src}, Flags);
Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other),
{Chain, Src}, Flags);
Chain = Hi.getValue(1);
} else
Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
Hi = DAG.getNode(N->getOpcode(), dl, NVT, Src);
} else {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (SrcVT.bitsLE(MVT::i64)) {
Expand All @@ -1667,7 +1665,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
GetPairElements(Tmp.first, Lo, Hi);
}

if (isSigned) {
// No need to complement for unsigned 32-bit integers
if (isSigned || SrcVT.bitsLE(MVT::i32)) {
if (Strict)
ReplaceValueWith(SDValue(N, 1), Chain);

Expand Down
100 changes: 11 additions & 89 deletions llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
Expand Up @@ -1683,107 +1683,29 @@ entry:
define ppc_fp128 @u32_to_ppcq(i32 zeroext %m) #0 {
; PC64LE-LABEL: u32_to_ppcq:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: std 30, -24(1) # 8-byte Folded Spill
; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
; PC64LE-NEXT: std 0, 16(1)
; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: mr 30, 3
; PC64LE-NEXT: addis 3, 2, .LCPI35_0@toc@ha
; PC64LE-NEXT: xxlxor 2, 2, 2
; PC64LE-NEXT: mtfprwa 0, 30
; PC64LE-NEXT: lfs 3, .LCPI35_0@toc@l(3)
; PC64LE-NEXT: xxlxor 4, 4, 4
; PC64LE-NEXT: xscvsxddp 31, 0
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl __gcc_qadd
; PC64LE-NEXT: nop
; PC64LE-NEXT: cmpwi 30, 0
; PC64LE-NEXT: blt 0, .LBB35_2
; PC64LE-NEXT: # %bb.1: # %entry
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: .LBB35_2: # %entry
; PC64LE-NEXT: blt 0, .LBB35_4
; PC64LE-NEXT: # %bb.3: # %entry
; PC64LE-NEXT: mtfprwz 0, 3
; PC64LE-NEXT: xxlxor 2, 2, 2
; PC64LE-NEXT: .LBB35_4: # %entry
; PC64LE-NEXT: addi 1, 1, 64
; PC64LE-NEXT: ld 0, 16(1)
; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
; PC64LE-NEXT: ld 30, -24(1) # 8-byte Folded Reload
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: xscvuxddp 1, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: u32_to_ppcq:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: std 30, -24(1) # 8-byte Folded Spill
; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
; PC64LE9-NEXT: std 0, 16(1)
; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: mr 30, 3
; PC64LE9-NEXT: addis 3, 2, .LCPI35_0@toc@ha
; PC64LE9-NEXT: mtfprwz 0, 3
; PC64LE9-NEXT: xxlxor 2, 2, 2
; PC64LE9-NEXT: mtfprwa 0, 30
; PC64LE9-NEXT: lfs 3, .LCPI35_0@toc@l(3)
; PC64LE9-NEXT: xscvsxddp 31, 0
; PC64LE9-NEXT: xxlxor 4, 4, 4
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl __gcc_qadd
; PC64LE9-NEXT: nop
; PC64LE9-NEXT: cmpwi 30, 0
; PC64LE9-NEXT: blt 0, .LBB35_2
; PC64LE9-NEXT: # %bb.1: # %entry
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: .LBB35_2: # %entry
; PC64LE9-NEXT: blt 0, .LBB35_4
; PC64LE9-NEXT: # %bb.3: # %entry
; PC64LE9-NEXT: xxlxor 2, 2, 2
; PC64LE9-NEXT: .LBB35_4: # %entry
; PC64LE9-NEXT: addi 1, 1, 64
; PC64LE9-NEXT: ld 0, 16(1)
; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
; PC64LE9-NEXT: ld 30, -24(1) # 8-byte Folded Reload
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: xscvuxddp 1, 0
; PC64LE9-NEXT: blr
;
; PC64-LABEL: u32_to_ppcq:
; PC64: # %bb.0: # %entry
; PC64-NEXT: mflr 0
; PC64-NEXT: std 0, 16(1)
; PC64-NEXT: stdu 1, -160(1)
; PC64-NEXT: std 30, 128(1) # 8-byte Folded Spill
; PC64-NEXT: mr 30, 3
; PC64-NEXT: extsw 3, 3
; PC64-NEXT: std 3, 120(1)
; PC64-NEXT: lis 4, 17200
; PC64-NEXT: stw 3, -4(1)
; PC64-NEXT: addis 3, 2, .LCPI35_0@toc@ha
; PC64-NEXT: stfd 31, 152(1) # 8-byte Folded Spill
; PC64-NEXT: lfd 0, 120(1)
; PC64-NEXT: lfs 3, .LCPI35_0@toc@l(3)
; PC64-NEXT: stw 4, -8(1)
; PC64-NEXT: lfs 0, .LCPI35_0@toc@l(3)
; PC64-NEXT: addis 3, 2, .LCPI35_1@toc@ha
; PC64-NEXT: lfs 31, .LCPI35_1@toc@l(3)
; PC64-NEXT: stfd 30, 144(1) # 8-byte Folded Spill
; PC64-NEXT: fcfid 30, 0
; PC64-NEXT: fmr 1, 30
; PC64-NEXT: fmr 2, 31
; PC64-NEXT: fmr 4, 31
; PC64-NEXT: bl __gcc_qadd
; PC64-NEXT: nop
; PC64-NEXT: cmpwi 30, 0
; PC64-NEXT: blt 0, .LBB35_2
; PC64-NEXT: # %bb.1: # %entry
; PC64-NEXT: fmr 1, 30
; PC64-NEXT: .LBB35_2: # %entry
; PC64-NEXT: blt 0, .LBB35_4
; PC64-NEXT: # %bb.3: # %entry
; PC64-NEXT: fmr 2, 31
; PC64-NEXT: .LBB35_4: # %entry
; PC64-NEXT: lfd 31, 152(1) # 8-byte Folded Reload
; PC64-NEXT: ld 30, 128(1) # 8-byte Folded Reload
; PC64-NEXT: lfd 30, 144(1) # 8-byte Folded Reload
; PC64-NEXT: addi 1, 1, 160
; PC64-NEXT: ld 0, 16(1)
; PC64-NEXT: mtlr 0
; PC64-NEXT: lfd 1, -8(1)
; PC64-NEXT: lfs 2, .LCPI35_1@toc@l(3)
; PC64-NEXT: fsub 1, 1, 0
; PC64-NEXT: blr
entry:
%conv = tail call ppc_fp128 @llvm.experimental.constrained.uitofp.ppcf128.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #1
Expand Down
14 changes: 10 additions & 4 deletions llvm/test/CodeGen/PowerPC/uint-to-ppcfp128-crash.ll
@@ -1,15 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s

; Ensure we don't crash by trying to convert directly from a subword load
; to a ppc_fp128 as we do for conversions to f32/f64.
define ppc_fp128 @test(i16* nocapture readonly %Ptr) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lhz 3, 0(3)
; CHECK-NEXT: xxlxor 2, 2, 2
; CHECK-NEXT: stw 3, -4(1)
; CHECK-NEXT: addi 3, 1, -4
; CHECK-NEXT: lfiwzx 0, 0, 3
; CHECK-NEXT: xscvuxddp 1, 0
; CHECK-NEXT: blr
entry:
%0 = load i16, i16* %Ptr, align 2
%conv = uitofp i16 %0 to ppc_fp128
ret ppc_fp128 %conv
; CHECK: lhz [[LD:[0-9]+]], 0(3)
; CHECK: mtfprwa [[MV:[0-9]+]], [[LD]]
; CHECK: xscvsxddp [[CONV:[0-9]+]], [[MV]]
; CHECK: bl __gcc_qadd
}

0 comments on commit 1f852ba

Please sign in to comment.