Skip to content

Commit

Permalink
[RISCV] Use vmv.s.x to build one element splat vector.
Browse files Browse the repository at this point in the history
When we want to create an splat vector that only the first element is initialized, we could use vmv.s.x or vfmv.s.f to build it.

Differential Revision: https://reviews.llvm.org/D116277
  • Loading branch information
jacquesguan committed Jan 11, 2022
1 parent f62f47f commit b607cd3
Show file tree
Hide file tree
Showing 11 changed files with 630 additions and 916 deletions.
16 changes: 14 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Expand Up @@ -1525,10 +1525,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::SPLAT_VECTOR:
case RISCVISD::VMV_S_X_VL:
case RISCVISD::VFMV_S_F_VL:
case RISCVISD::VMV_V_X_VL:
case RISCVISD::VFMV_V_F_VL: {
// Try to match splat of a scalar load to a strided load with stride of x0.
SDValue Src = Node->getOperand(0);
bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
if (IsScalarMove && !Node->getOperand(0).isUndef())
break;
SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0);
auto *Ld = dyn_cast<LoadSDNode>(Src);
if (!Ld)
break;
Expand All @@ -1543,7 +1549,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue VL;
if (Node->getOpcode() == ISD::SPLAT_VECTOR)
VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
else
else if (IsScalarMove) {
// We could deal with more VL if we update the VSETVLI insert pass to
// avoid introducing more VSETVLI.
if (!isOneConstant(Node->getOperand(2)))
break;
selectVLOp(Node->getOperand(2), VL);
} else
selectVLOp(Node->getOperand(1), VL);

unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
Expand Down
18 changes: 17 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -2235,8 +2235,13 @@ static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (VT.isFloatingPoint())
if (VT.isFloatingPoint()) {
// If VL is 1, we could use vfmv.s.f.
if (isOneConstant(VL))
return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
Scalar, VL);
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
}

MVT XLenVT = Subtarget.getXLenVT();

Expand All @@ -2249,12 +2254,23 @@ static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
// If VL is 1 and the scalar value won't benefit from immediate, we could
// use vmv.s.x.
if (isOneConstant(VL) &&
(!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
VL);
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
}

assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
"Unexpected scalar for splat lowering!");

if (isOneConstant(VL) && isNullConstant(Scalar))
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
DAG.getConstant(0, DL, XLenVT), VL);

// Otherwise use the more complicated splatting algorithm.
return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
}
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
Expand Up @@ -10,7 +10,7 @@ define half @vpreduce_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 zeroex
; CHECK-LABEL: vpreduce_fadd_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -23,7 +23,7 @@ define half @vpreduce_ord_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 ze
; CHECK-LABEL: vpreduce_ord_fadd_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -38,7 +38,7 @@ define half @vpreduce_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 zeroex
; CHECK-LABEL: vpreduce_fadd_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -51,7 +51,7 @@ define half @vpreduce_ord_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 ze
; CHECK-LABEL: vpreduce_ord_fadd_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -66,7 +66,7 @@ define float @vpreduce_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 zer
; CHECK-LABEL: vpreduce_fadd_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -79,7 +79,7 @@ define float @vpreduce_ord_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32
; CHECK-LABEL: vpreduce_ord_fadd_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -94,7 +94,7 @@ define float @vpreduce_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 zer
; CHECK-LABEL: vpreduce_fadd_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -107,7 +107,7 @@ define float @vpreduce_ord_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32
; CHECK-LABEL: vpreduce_ord_fadd_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -122,7 +122,7 @@ define double @vpreduce_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32
; CHECK-LABEL: vpreduce_fadd_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -135,7 +135,7 @@ define double @vpreduce_ord_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m,
; CHECK-LABEL: vpreduce_ord_fadd_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
Expand All @@ -150,7 +150,7 @@ define double @vpreduce_fadd_v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32
; CHECK-LABEL: vpreduce_fadd_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v10, fa0
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v10
Expand All @@ -163,7 +163,7 @@ define double @vpreduce_ord_fadd_v4f64(double %s, <4 x double> %v, <4 x i1> %m,
; CHECK-LABEL: vpreduce_ord_fadd_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; CHECK-NEXT: vfmv.v.f v10, fa0
; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v10
Expand Down

0 comments on commit b607cd3

Please sign in to comment.