Skip to content

Commit

Permalink
[Sparc] efficient pattern for UINT_TO_FP conversion
Browse files Browse the repository at this point in the history
Summary:
        while investigating performance degradation of imagick benchmark
        there were found inefficient pattern for UINT_TO_FP conversion.
        That pattern causes RAW hazard in assembly code. Specifically,
        uitofp IR operator results in poor assembler :

        st          %i0, [%fp - 952]
        ldd         [%fp - 952], %f0

        it stores 32-bit integer register into memory location and then
        loads 64-bit floating point data from that location.
        That is exactly RAW hazard case. To optimize that case it is
        possible to use SPISD::ITOF and SPISD::XTOF for conversion from
        integer to floating point data type and to use ISD::BITCAST to
        copy from integer register into floating point register.
        The fix is to write custom UINT_TO_FP pattern using SPISD::ITOF,
        SPISD::XTOF, ISD::BITCAST.

Patch by Alexey Lapshin

Reviewers: fedor.sergeev, jyknight, dcederman, lero_chris

Reviewed By: jyknight

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D36875

llvm-svn: 318704
  • Loading branch information
Theodor committed Nov 20, 2017
1 parent 2bc260a commit a476117
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 30 deletions.
92 changes: 73 additions & 19 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Expand Up @@ -1559,9 +1559,6 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);

setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);

// Sparc has no select or setcc: expand to SELECT_CC.
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
Expand Down Expand Up @@ -1590,13 +1587,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);

setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::f32, Custom);

if (Subtarget->is64Bit()) {
setOperationAction(ISD::ADDC, MVT::i64, Custom);
setOperationAction(ISD::ADDE, MVT::i64, Custom);
setOperationAction(ISD::SUBC, MVT::i64, Custom);
setOperationAction(ISD::SUBE, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
Expand All @@ -1610,6 +1608,9 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL , MVT::i64, Expand);
setOperationAction(ISD::ROTR , MVT::i64, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);

setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}

// ATOMICs.
Expand Down Expand Up @@ -2425,23 +2426,76 @@ static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG,
1);
}

static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI,
bool hasHardQuad) {
SDValue SparcTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT SrcVT = Op.getOperand(0).getValueType();

EVT DstVT = Op.getValueType();

if (Subtarget->isVIS3()) {
if (DstVT == MVT::f32 && SrcVT == MVT::i32) {
return Op; // Legal
} else if (DstVT == MVT::f64 && SrcVT == MVT::i64) {
return (Subtarget->is64Bit())
? Op
: SDValue(); // Legal on 64 bit, otherwise Expand
} else if (DstVT == MVT::i64 && SrcVT == MVT::f64) {
return (Subtarget->is64Bit())
? Op
: SDValue(); // Legal on 64 bit, otherwise Expand
}
}

// Expand
return SDValue();
}

SDValue SparcTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT OpVT = Op.getOperand(0).getValueType();
assert(OpVT == MVT::i32 || OpVT == MVT::i64);

// Expand if it does not involve f128 or the target has support for
// quad floating point instructions and the operand type is legal.
if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT)))
return SDValue();
// Expand f128 operations to fp128 ABI calls.
if (Op.getValueType() == MVT::f128 &&
(!Subtarget->hasHardQuad() || !isTypeLegal(OpVT))) {
return LowerF128Op(Op, DAG,
getLibcallName(OpVT == MVT::i32
? RTLIB::UINTTOFP_I32_F128
: RTLIB::UINTTOFP_I64_F128),
1);
}

// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(Op.getOperand(0))) {

EVT floatVT = MVT::f32;
unsigned IntToFloatOpcode = SPISD::ITOF;

if (OpVT == MVT::i64) {
floatVT = MVT::f64;
IntToFloatOpcode = SPISD::XTOF;
}

return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(OpVT == MVT::i32
? RTLIB::UINTTOFP_I32_F128
: RTLIB::UINTTOFP_I64_F128),
1);
// Convert the int value to FP in an FP register.
SDValue FloatTmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));

return DAG.getNode(IntToFloatOpcode, dl, Op.getValueType(), FloatTmp);
}

if (OpVT == MVT::i32 && Subtarget->is64Bit()) {

SDValue Int64Tmp =
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Op.getOperand(0));

SDValue Float64Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Int64Tmp);

return DAG.getNode(SPISD::XTOF, dl, Op.getValueType(), Float64Tmp);
}

return SDValue();
}

static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
Expand Down Expand Up @@ -3059,8 +3113,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
hasHardQuad);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this,
hasHardQuad);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,
hasHardQuad);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this,
hasHardQuad);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
Expand Down Expand Up @@ -3097,6 +3150,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::BITCAST: return LowerBITCAST(Op, DAG);
}
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/Sparc/SparcISelLowering.h
Expand Up @@ -192,6 +192,10 @@ namespace llvm {

SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;

bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
Expand Down
25 changes: 15 additions & 10 deletions llvm/lib/Target/Sparc/SparcInstrVIS.td
Expand Up @@ -243,16 +243,21 @@ def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
(ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>;

let rs1 = 0 in {
def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
(ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>;
def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
(ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>;
def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd),
(ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>;
def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs:$rd),
(ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd),
(ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd), (ins FPRegs:$rs2),
"movstosw $rs2, $rd",
[(set I64Regs:$rd, (sext (i32 (bitconvert FPRegs:$rs2))))]>;
def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd), (ins FPRegs:$rs2),
"movstouw $rs2, $rd",
[(set I64Regs:$rd, (zext (i32 (bitconvert FPRegs:$rs2))))]>;
def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd), (ins DFPRegs:$rs2),
"movdtox $rs2, $rd",
[(set I64Regs:$rd, (bitconvert DFPRegs:$rs2))]>;
def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs:$rd), (ins IntRegs:$rs2),
"movwtos $rs2, $rd",
[(set FPRegs:$rd, (bitconvert i32:$rs2))]>;
def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd), (ins I64Regs:$rs2),
"movxtod $rs2, $rd",
[(set DFPRegs:$rd, (bitconvert I64Regs:$rs2))]>;
}

def PDISTN : VISInst<0b000111111, "pdistn">;
Expand Down
53 changes: 52 additions & 1 deletion llvm/test/CodeGen/SPARC/float.ll
Expand Up @@ -3,6 +3,8 @@
; RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=V8-UNOPT
; RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
; RUN: llc -mtriple=sparc64-unknown-linux < %s | FileCheck %s -check-prefix=SPARC64
; RUN: llc -march=sparc -mcpu=niagara4 < %s | FileCheck %s -check-prefix=VIS3
; RUN: llc -march=sparcv9 -mcpu=niagara4 < %s | FileCheck %s -check-prefix=VIS3-64

; V8-LABEL: test_neg:
; V8: call get_double
Expand Down Expand Up @@ -194,7 +196,7 @@ entry:
; V9: fstoi

; SPARC64-LABEL: test_utos_stou
; SPARC64: fdtos
; SPARC64: fxtos
; SPARC64: fstoi

define void @test_utos_stou(i32 %a, i32* %ptr0, float* %ptr1) {
Expand Down Expand Up @@ -240,6 +242,9 @@ entry:
; SPARC64-NOT: fitod
; SPARC64: fdtoi

; VIS3-64-LABEL: test_utod_dtou
; VIS3-64: movxtod

define void @test_utod_dtou(i32 %a, double %b, i32* %ptr0, double* %ptr1) {
entry:
%0 = uitofp i32 %a to double
Expand All @@ -248,3 +253,49 @@ entry:
store i32 %1, i32* %ptr0, align 8
ret void
}

; V8-LABEL: test_ustod
; V8: fitod

; VIS3-LABEL: test_ustod
; VIS3: movwtos

define double @test_ustod(i16 zeroext) {
%2 = uitofp i16 %0 to double
ret double %2
}

; V8-LABEL: test_ustos
; V8: fitos

; VIS3-LABEL: test_ustos
; VIS3: movwtos

define float @test_ustos(i16 zeroext) {
%2 = uitofp i16 %0 to float
ret float %2
}

; check for movwtos used for bitcast
;
; VIS3-LABEL: test_bitcast_utos
; VIS3:movwtos

define float @test_bitcast_utos(i32 ) {
%2 = bitcast i32 %0 to float
ret float %2
}


; check for movxtod used for bitcast
;
; VIS3-64-LABEL: test_bitcast_uxtod
; VIS3-64:movxtod

define double @test_bitcast_uxtod(i64 ) {
%2 = bitcast i64 %0 to double
ret double %2
}



0 comments on commit a476117

Please sign in to comment.