diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 9885feda1e9b0c..025f5f69c15237 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -640,6 +640,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FABS, VT, Custom); + setOperationAction(ISD::FCOPYSIGN, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); @@ -1618,6 +1619,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerABS(Op, DAG); case ISD::VSELECT: return lowerFixedLengthVectorSelectToRVV(Op, DAG); + case ISD::FCOPYSIGN: + return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); } } @@ -3166,8 +3169,7 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { assert(VT.isFixedLengthVector() && "Unexpected type"); - MVT ContainerVT = - RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT ContainerVT = getContainerForFixedLengthVector(VT); X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); SDValue Mask, VL; @@ -3184,6 +3186,28 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { return convertFromScalableVector(VT, Max, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( + SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue Mag = Op.getOperand(0); + SDValue Sign = Op.getOperand(1); + assert(Mag.getValueType() == Sign.getValueType() && + "Can only handle COPYSIGN with matching types."); + + MVT ContainerVT = getContainerForFixedLengthVector(VT); + Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); + Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + SDValue CopySign = + DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL); + + return convertFromScalableVector(VT, CopySign, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); @@ -6108,6 +6132,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FABS_VL) NODE_NAME_CASE(FSQRT_VL) NODE_NAME_CASE(FMA_VL) + NODE_NAME_CASE(FCOPYSIGN_VL) NODE_NAME_CASE(SMIN_VL) NODE_NAME_CASE(SMAX_VL) NODE_NAME_CASE(UMIN_VL) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 87a4df55ae2ff5..0d02acad46dc82 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -174,6 +174,7 @@ enum NodeType : unsigned { FABS_VL, FSQRT_VL, FMA_VL, + FCOPYSIGN_VL, SMIN_VL, SMAX_VL, UMIN_VL, @@ -451,6 +452,8 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, + SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 350c6e6a6fe4be..510bd14e9d472b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -89,6 +89,7 @@ def riscv_fdiv_vl : SDNode<"RISCVISD::FDIV_VL", SDT_RISCVFPBinOp_VL>; def riscv_fneg_vl : SDNode<"RISCVISD::FNEG_VL", SDT_RISCVFPUnOp_VL>; def riscv_fabs_vl : SDNode<"RISCVISD::FABS_VL", SDT_RISCVFPUnOp_VL>; def riscv_fsqrt_vl : SDNode<"RISCVISD::FSQRT_VL", SDT_RISCVFPUnOp_VL>; +def riscv_fcopysign_vl : SDNode<"RISCVISD::FCOPYSIGN_VL", SDT_RISCVFPBinOp_VL>; def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -854,6 +855,27 @@ foreach vti = AllFloatVectors in { (XLenVT (VLOp GPR:$vl))), (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) vti.RegClass:$rs, vti.RegClass:$rs, GPR:$vl, vti.SEW)>; + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (!cast("PseudoVFSGNJ_VV_"# vti.LMul.MX) + vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.SEW)>; + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (riscv_fneg_vl vti.RegClass:$rs2, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) + vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.SEW)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (SplatFPOp vti.ScalarRegClass:$rs2), + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (!cast("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX) + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.SEW)>; } foreach fvti = AllFloatVectors in { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 99c33f87927834..14cdeccb29006f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -283,6 +283,201 @@ define void @fabs_v2f64(<2 x double>* %x) { } declare <2 x double> @llvm.fabs.v2f64(<2 x double>) +define void @copysign_v8f16(<8 x half>* %x, <8 x half>* %y) { +; CHECK-LABEL: copysign_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vfsgnj.vv v25, v25, v26 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) + store <8 x half> %c, <8 x half>* %x + ret void +} +declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) + +define void @copysign_v4f32(<4 x float>* %x, <4 x float>* %y) { +; CHECK-LABEL: copysign_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vfsgnj.vv v25, v25, v26 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) + store <4 x float> %c, <4 x float>* %x + ret void +} +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) + +define void @copysign_v2f64(<2 x double>* %x, <2 x double>* %y) { +; CHECK-LABEL: copysign_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vfsgnj.vv v25, v25, v26 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) + store <2 x double> %c, <2 x double>* %x + ret void +} +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) + +define void @copysign_vf_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: copysign_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfsgnj.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c) + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @copysign_vf_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: copysign_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfsgnj.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c) + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @copysign_vf_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: copysign_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfsgnj.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c) + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @copysign_neg_v8f16(<8 x half>* %x, <8 x half>* %y) { +; CHECK-LABEL: copysign_neg_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vfsgnjn.vv v25, v25, v26 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = fneg <8 x half> %b + %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c) + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @copysign_neg_v4f32(<4 x float>* %x, <4 x float>* %y) { +; CHECK-LABEL: copysign_neg_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vfsgnjn.vv v25, v25, v26 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = fneg <4 x float> %b + %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c) + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @copysign_neg_v2f64(<2 x double>* %x, <2 x double>* %y) { +; CHECK-LABEL: copysign_neg_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vfsgnjn.vv v25, v25, v26 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = fneg <2 x double> %b + %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c) + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @copysign_neg_trunc_v4f16_v4f32(<4 x half>* %x, <4 x float>* %y) { +; CHECK-LABEL: copysign_neg_trunc_v4f16_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v27, v26 +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v25, v25, v27 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x half>, <4 x half>* %x + %b = load <4 x float>, <4 x float>* %y + %c = fneg <4 x float> %b + %d = fptrunc <4 x float> %c to <4 x half> + %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d) + store <4 x half> %e, <4 x half>* %x + ret void +} +declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) + +define void @copysign_neg_ext_v2f64_v2f32(<2 x double>* %x, <2 x float>* %y) { +; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v27, v26 +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vfsgnjn.vv v25, v25, v27 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x float>, <2 x float>* %y + %c = fneg <2 x float> %b + %d = fpext <2 x float> %c to <2 x double> + %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d) + store <2 x double> %e, <2 x double>* %x + ret void +} + define void @sqrt_v8f16(<8 x half>* %x) { ; CHECK-LABEL: sqrt_v8f16: ; CHECK: # %bb.0: