diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 14e15bad93302..c73905d3357a5 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -637,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::STRICT_FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); setTargetDAGCombine(ISD::UDIV); @@ -5386,6 +5388,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VEXTEND); OPCODE(STRICT_VEXTEND); OPCODE(VROUND); + OPCODE(STRICT_VROUND); OPCODE(VTM); OPCODE(VFAE_CC); OPCODE(VFAEZ_CC); @@ -5908,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS( return SDValue(); } +static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { + SDValue Chain1 = N1->getOperand(0); + SDValue Chain2 = N2->getOperand(0); + + // Trivial case: both nodes take the same chain. + if (Chain1 == Chain2) + return Chain1; + + // FIXME - we could handle more complex cases via TokenFactor, + // assuming we can verify that this would not create a cycle. + return SDValue(); +} + SDValue SystemZTargetLowering::combineFP_ROUND( SDNode *N, DAGCombinerInfo &DCI) const { @@ -5920,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND( // (extract_vector_elt (VROUND X) 2) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5937,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND( U->getOperand(1).getOpcode() == ISD::Constant && cast(U->getOperand(1))->getZExtValue() == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); - if (OtherRound.getOpcode() == ISD::FP_ROUND && - OtherRound.getOperand(0) == SDValue(U, 0) && + if (OtherRound.getOpcode() == N->getOpcode() && + OtherRound.getOperand(OpNo) == SDValue(U, 0) && OtherRound.getValueType() == MVT::f32) { - SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), - MVT::v4f32, Vec); + SDValue VRound, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherRound.getNode()); + if (!Chain) + continue; + VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), + {MVT::v4f32, MVT::Other}, {Chain, Vec}); + Chain = VRound.getValue(1); + } else + VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); DCI.AddToWorklist(VRound.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -5971,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( // (extract_vector_elt (VEXTEND X) 1) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5988,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( U->getOperand(1).getOpcode() == ISD::Constant && cast(U->getOperand(1))->getZExtValue() == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); - if (OtherExtend.getOpcode() == ISD::FP_EXTEND && - OtherExtend.getOperand(0) == SDValue(U, 0) && + if (OtherExtend.getOpcode() == N->getOpcode() && + OtherExtend.getOperand(OpNo) == SDValue(U, 0) && OtherExtend.getValueType() == MVT::f64) { - SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), - MVT::v2f64, Vec); + SDValue VExtend, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherExtend.getNode()); + if (!Chain) + continue; + VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), + {MVT::v2f64, MVT::Other}, {Chain, Vec}); + Chain = VExtend.getValue(1); + } else + VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), + MVT::v2f64, Vec); DCI.AddToWorklist(VExtend.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -6341,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index e49c47e379ef6..0ac07a12ab711 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -267,8 +267,8 @@ enum NodeType : unsigned { VEXTEND, STRICT_VEXTEND, // Round the f64 elements of vector operand 0 to f32s and store them in the - // even elements of the result. - VROUND, + // even elements of the result. Regular and strict versions. + VROUND, STRICT_VROUND, // AND the two vector operands together and set CC based on the result. VTM, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index de6e473dd56bc..c945122ee577a 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in { def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; } - def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; + def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; def : FPConversion; let Predicates = [FeatureVectorEnhancements1] in { let Uses = [FPC], mayRaiseFPException = 1 in { diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 0beefc4682a04..a6a72903e5736 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -353,6 +353,8 @@ def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; +def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>; @@ -741,6 +743,9 @@ def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs), def z_any_vextend : PatFrags<(ops node:$src), [(z_strict_vextend node:$src), (z_vextend node:$src)]>; +def z_any_vround : PatFrags<(ops node:$src), + [(z_strict_vround node:$src), + (z_vround node:$src)]>; // Create a unary operator that loads from memory and then performs // the given operation on it. diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll new file mode 100644 index 0000000000000..d4590a57d3edf --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll @@ -0,0 +1,61 @@ +; Test conversions between different-sized float elements. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) + +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) + +; Test cases where both elements of a v2f64 are converted to f32s. +define void @f1(<2 x double> %val, <2 x float> *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vledb {{%v[0-9]+}}, %v24, 0, 0 +; CHECK: br %r14 + %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( + <2 x double> %val, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store <2 x float> %res, <2 x float> *%ptr + ret void +} + +; Test conversion of an f64 in a vector register to an f32. +define float @f2(<2 x double> %vec) #0 { +; CHECK-LABEL: f2: +; CHECK: wledb %f0, %v24, 0, 0 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %vec, i32 0 + %ret = call float @llvm.experimental.constrained.fptrunc.f32.f64( + double %scalar, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %ret +} + +; Test cases where even elements of a v4f32 are converted to f64s. +define <2 x double> @f3(<4 x float> %vec) { +; CHECK-LABEL: f3: +; CHECK: vldeb %v24, {{%v[0-9]+}} +; CHECK: br %r14 + %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> + %res = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( + <2 x float> %shuffle, + metadata !"fpexcept.strict") #0 + ret <2 x double> %res +} + +; Test conversion of an f32 in a vector register to an f64. +define double @f4(<4 x float> %vec) { +; CHECK-LABEL: f4: +; CHECK: wldeb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %vec, i32 0 + %ret = call double @llvm.experimental.constrained.fpext.f64.f32( + float %scalar, + metadata !"fpexcept.strict") #0 + ret double %ret +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index e7c4e3a4466ee..348be4a9f14f8 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -5417,13 +5417,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* % ; SZ13-LABEL: constrained_vector_fptrunc_v3f64: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vl %v1, 0(%r2), 4 -; SZ13-NEXT: ledbra %f2, 0, %f1, 0 -; SZ13-NEXT: vrepg %v1, %v1, 1 +; SZ13-NEXT: vledb %v1, %v1, 0, 0 +; SZ13-NEXT: larl %r1, .LCPI97_0 ; SZ13-NEXT: ld %f0, 16(%r2) -; SZ13-NEXT: ledbra %f1, 0, %f1, 0 +; SZ13-NEXT: vl %v2, 0(%r1), 3 +; SZ13-NEXT: vperm %v1, %v1, %v0, %v2 ; SZ13-NEXT: ledbra %f0, 0, %f0, 0 -; SZ13-NEXT: vmrhf %v1, %v2, %v1 -; SZ13-NEXT: vmrhg %v1, %v1, %v1 ; SZ13-NEXT: ste %f0, 8(%r3) ; SZ13-NEXT: vsteg %v1, 0(%r3), 0 ; SZ13-NEXT: br %r14 @@ -5544,13 +5543,11 @@ define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %de ; SZ13-LABEL: constrained_vector_fpext_v3f64: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vl %v0, 0(%r2), 4 -; SZ13-NEXT: vrepf %v2, %v0, 1 -; SZ13-NEXT: ldebr %f1, %f0 -; SZ13-NEXT: ldebr %f2, %f2 -; SZ13-NEXT: vrepf %v0, %v0, 2 -; SZ13-NEXT: ldebr %f0, %f0 -; SZ13-NEXT: vmrhg %v1, %v1, %v2 -; SZ13-NEXT: std %f0, 16(%r3) +; SZ13-NEXT: vrepf %v1, %v0, 1 +; SZ13-NEXT: vldeb %v0, %v0 +; SZ13-NEXT: ldebr %f1, %f1 +; SZ13-NEXT: vmrhg %v1, %v0, %v1 +; SZ13-NEXT: vsteg %v0, 16(%r3), 1 ; SZ13-NEXT: vst %v1, 0(%r3), 4 ; SZ13-NEXT: br %r14 entry: