From a855e17f096d29e766362aa6e96ffe6d0c886ca2 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 5 Jul 2018 06:21:37 +0000 Subject: [PATCH] [Power9] Ensure float128 in non-homogenous aggregates are passed via VSX reg Non-homogenous aggregates are passed in consecutive GPRs, in GPRs and in memory, or in memory. This patch ensures that float128 members of non-homogenous aggregates are passed via VSX registers. This is done via custom lowering a bitcast of a build_pari(i64,i64) to float128 to a new PPCISD node, BUILD_FP128. Differential Revision: https://reviews.llvm.org/D48308 llvm-svn: 336310 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 21 +++++++++++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 ++++ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 7 +++++++ llvm/lib/Target/PowerPC/PPCInstrVSX.td | 11 +++++++++++ 4 files changed, 43 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 705e1e0771086..18d94ec8a50db 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -814,6 +814,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i128, Custom); } } @@ -1268,6 +1269,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; + case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; } return nullptr; } @@ -7661,6 +7663,23 @@ static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, return !(IsSplat && IsLoad); } +// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128. +SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { + + SDLoc dl(Op); + SDValue Op0 = Op->getOperand(0); + + if (!EnableQuadPrecision || + (Op.getValueType() != MVT::f128 ) || + (Op0.getOpcode() != ISD::BUILD_PAIR) || + (Op0.getOperand(0).getValueType() != MVT::i64) || + (Op0.getOperand(1).getValueType() != MVT::i64)) + return SDValue(); + + return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0), + Op0.getOperand(1)); +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -9455,6 +9474,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); + case ISD::BITCAST: return LowerBITCAST(Op, DAG); + // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 0c837730f0042..8bd864acec7de 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -189,6 +189,9 @@ namespace llvm { /// Direct move from a GPR to a VSX register (zero) MTVSRZ, + /// Direct move of 2 consective GPR to a VSX register. + BUILD_FP128, + /// Extract a subvector from signed integer vector and convert to FP. /// It is primarily used to convert a (widened) illegal integer vector /// type to a legal floating point vector type. @@ -1065,6 +1068,7 @@ namespace llvm { SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f80c10905caea..8df9a1d6e211c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -218,6 +218,13 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; +// Move 2 i64 values into a VSX register +def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", + SDTypeProfile<1, 2, + [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, + SDTCisSameAs<1,2>]>, + []>; + // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 1aea324995e5b..06e06404a18ee 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3387,6 +3387,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { } // end HasP9Vector, AddedComplexity +let AddedComplexity = 400 in { + let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in { + def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; + } + let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in { + def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; + } +} + let Predicates = [HasP9Vector] in { let isPseudo = 1 in { let mayStore = 1 in {