Skip to content

Commit

Permalink
[RISCV] Add support for selecting vrgather.vx/vi for fixed vector spl…
Browse files Browse the repository at this point in the history
…at shuffles.

The test cases extract a fixed element from a vector and splat it
into a vector. This gets DAG combined into a splat shuffle.

I've used some very wide vectors in the test to make sure we have
at least a couple tests where the element doesn't fit into the
uimm5 immediate of vrgather.vi so we fall back to vrgather.vx.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D96186
  • Loading branch information
topperc committed Feb 10, 2021
1 parent 2193e8b commit 0c254b4
Show file tree
Hide file tree
Showing 5 changed files with 460 additions and 0 deletions.
35 changes: 35 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -524,6 +524,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);

setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);

setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
Expand Down Expand Up @@ -554,6 +555,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);

setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);

setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
Expand Down Expand Up @@ -853,6 +855,36 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue();
}

static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
if (Lane >= 0) {
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);

V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
assert(Lane < (int)VT.getVectorNumElements() && "Unexpected lane!");

MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
SDValue Gather =
DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
DAG.getConstant(Lane, DL, XLenVT), Mask, VL);
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
}

return SDValue();
}

SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
Expand Down Expand Up @@ -1102,6 +1134,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFPVECREDUCE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
case ISD::LOAD:
return lowerFixedLengthVectorLoadToRVV(Op, DAG);
case ISD::STORE:
Expand Down Expand Up @@ -4638,6 +4672,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMA_VL)
NODE_NAME_CASE(VMCLR_VL)
NODE_NAME_CASE(VMSET_VL)
NODE_NAME_CASE(VRGATHER_VX_VL)
NODE_NAME_CASE(VLE_VL)
NODE_NAME_CASE(VSE_VL)
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Expand Up @@ -167,6 +167,9 @@ enum NodeType : unsigned {
VMCLR_VL,
VMSET_VL,

// Matches the semantics of vrgather.vx with an extra operand for VL.
VRGATHER_VX_VL,

// Memory opcodes start here.
VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE,
VSE_VL,
Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Expand Up @@ -79,6 +79,14 @@ def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisVT<5, XLenVT>]>;
def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL>;

def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL",
SDTypeProfile<1, 4, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisVT<2, XLenVT>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<0, 3>,
SDTCisVT<4, XLenVT>]>>;

def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCisVec<0>,
SDTCVecEltisVT<0, i1>,
SDTCisVT<1, XLenVT>]>;
Expand Down Expand Up @@ -211,6 +219,41 @@ foreach vti = AllFloatVectors in {

} // Predicates = [HasStdExtV, HasStdExtF]

// 17.4. Vector Register GAther Instruction
let Predicates = [HasStdExtV] in {

foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX)
vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.SEW)>;
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>;
}

} // Predicates = [HasStdExtV]

let Predicates = [HasStdExtV, HasStdExtF] in {

foreach vti = AllFloatVectors in {
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX)
vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.SEW)>;
def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm,
(vti.Mask true_mask),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.SEW)>;
}

} // Predicates = [HasStdExtV, HasStdExtF]

//===----------------------------------------------------------------------===//
// Miscellaneous RISCVISD SDNodes
//===----------------------------------------------------------------------===//
Expand Down
177 changes: 177 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
@@ -0,0 +1,177 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1

define void @gather_const_v8f16(<8 x half>* %x) {
; CHECK-LABEL: gather_const_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 8
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 5
; CHECK-NEXT: vse16.v v26, (a0)
; CHECK-NEXT: ret
%a = load <8 x half>, <8 x half>* %x
%b = extractelement <8 x half> %a, i32 5
%c = insertelement <8 x half> undef, half %b, i32 0
%d = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
store <8 x half> %d, <8 x half>* %x
ret void
}

define void @gather_const_v4f32(<4 x float>* %x) {
; CHECK-LABEL: gather_const_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 4
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 2
; CHECK-NEXT: vse32.v v26, (a0)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = extractelement <4 x float> %a, i32 2
%c = insertelement <4 x float> undef, float %b, i32 0
%d = shufflevector <4 x float> %c, <4 x float> undef, <4 x i32> zeroinitializer
store <4 x float> %d, <4 x float>* %x
ret void
}

define void @gather_const_v2f64(<2 x double>* %x) {
; CHECK-LABEL: gather_const_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, zero, 2
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
; CHECK-NEXT: vrgather.vi v26, v25, 0
; CHECK-NEXT: vse64.v v26, (a0)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = extractelement <2 x double> %a, i32 0
%c = insertelement <2 x double> undef, double %b, i32 0
%d = shufflevector <2 x double> %c, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %d, <2 x double>* %x
ret void
}

define void @gather_const_v64f16(<64 x half>* %x) {
; LMULMAX8-LABEL: gather_const_v64f16:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: addi a1, zero, 64
; LMULMAX8-NEXT: vsetvli a1, a1, e16,m8,ta,mu
; LMULMAX8-NEXT: vle16.v v8, (a0)
; LMULMAX8-NEXT: addi a1, zero, 47
; LMULMAX8-NEXT: vrgather.vx v16, v8, a1
; LMULMAX8-NEXT: vse16.v v16, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v64f16:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 80
; LMULMAX1-NEXT: addi a2, zero, 8
; LMULMAX1-NEXT: vsetvli a2, a2, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a1)
; LMULMAX1-NEXT: addi a6, a0, 16
; LMULMAX1-NEXT: addi a7, a0, 48
; LMULMAX1-NEXT: addi a4, a0, 32
; LMULMAX1-NEXT: addi a5, a0, 64
; LMULMAX1-NEXT: addi a2, a0, 112
; LMULMAX1-NEXT: addi a3, a0, 96
; LMULMAX1-NEXT: vrgather.vi v26, v25, 7
; LMULMAX1-NEXT: vse16.v v26, (a3)
; LMULMAX1-NEXT: vse16.v v26, (a2)
; LMULMAX1-NEXT: vse16.v v26, (a5)
; LMULMAX1-NEXT: vse16.v v26, (a1)
; LMULMAX1-NEXT: vse16.v v26, (a4)
; LMULMAX1-NEXT: vse16.v v26, (a7)
; LMULMAX1-NEXT: vse16.v v26, (a0)
; LMULMAX1-NEXT: vse16.v v26, (a6)
; LMULMAX1-NEXT: ret
%a = load <64 x half>, <64 x half>* %x
%b = extractelement <64 x half> %a, i32 47
%c = insertelement <64 x half> undef, half %b, i32 0
%d = shufflevector <64 x half> %c, <64 x half> undef, <64 x i32> zeroinitializer
store <64 x half> %d, <64 x half>* %x
ret void
}

define void @gather_const_v32f32(<32 x float>* %x) {
; LMULMAX8-LABEL: gather_const_v32f32:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: addi a1, zero, 32
; LMULMAX8-NEXT: vsetvli a1, a1, e32,m8,ta,mu
; LMULMAX8-NEXT: vle32.v v8, (a0)
; LMULMAX8-NEXT: vrgather.vi v16, v8, 17
; LMULMAX8-NEXT: vse32.v v16, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v32f32:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 64
; LMULMAX1-NEXT: addi a2, zero, 4
; LMULMAX1-NEXT: vsetvli a2, a2, e32,m1,ta,mu
; LMULMAX1-NEXT: vle32.v v25, (a1)
; LMULMAX1-NEXT: addi a6, a0, 16
; LMULMAX1-NEXT: addi a7, a0, 48
; LMULMAX1-NEXT: addi a4, a0, 32
; LMULMAX1-NEXT: addi a5, a0, 80
; LMULMAX1-NEXT: addi a2, a0, 112
; LMULMAX1-NEXT: addi a3, a0, 96
; LMULMAX1-NEXT: vrgather.vi v26, v25, 1
; LMULMAX1-NEXT: vse32.v v26, (a3)
; LMULMAX1-NEXT: vse32.v v26, (a2)
; LMULMAX1-NEXT: vse32.v v26, (a1)
; LMULMAX1-NEXT: vse32.v v26, (a5)
; LMULMAX1-NEXT: vse32.v v26, (a4)
; LMULMAX1-NEXT: vse32.v v26, (a7)
; LMULMAX1-NEXT: vse32.v v26, (a0)
; LMULMAX1-NEXT: vse32.v v26, (a6)
; LMULMAX1-NEXT: ret
%a = load <32 x float>, <32 x float>* %x
%b = extractelement <32 x float> %a, i32 17
%c = insertelement <32 x float> undef, float %b, i32 0
%d = shufflevector <32 x float> %c, <32 x float> undef, <32 x i32> zeroinitializer
store <32 x float> %d, <32 x float>* %x
ret void
}

define void @gather_const_v16f64(<16 x double>* %x) {
; LMULMAX8-LABEL: gather_const_v16f64:
; LMULMAX8: # %bb.0:
; LMULMAX8-NEXT: addi a1, zero, 16
; LMULMAX8-NEXT: vsetvli a1, a1, e64,m8,ta,mu
; LMULMAX8-NEXT: vle64.v v8, (a0)
; LMULMAX8-NEXT: vrgather.vi v16, v8, 10
; LMULMAX8-NEXT: vse64.v v16, (a0)
; LMULMAX8-NEXT: ret
;
; LMULMAX1-LABEL: gather_const_v16f64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a1, a0, 80
; LMULMAX1-NEXT: addi a2, zero, 2
; LMULMAX1-NEXT: vsetvli a2, a2, e64,m1,ta,mu
; LMULMAX1-NEXT: vle64.v v25, (a1)
; LMULMAX1-NEXT: addi a6, a0, 16
; LMULMAX1-NEXT: addi a7, a0, 48
; LMULMAX1-NEXT: addi a4, a0, 32
; LMULMAX1-NEXT: addi a5, a0, 64
; LMULMAX1-NEXT: addi a2, a0, 112
; LMULMAX1-NEXT: addi a3, a0, 96
; LMULMAX1-NEXT: vrgather.vi v26, v25, 0
; LMULMAX1-NEXT: vse64.v v26, (a3)
; LMULMAX1-NEXT: vse64.v v26, (a2)
; LMULMAX1-NEXT: vse64.v v26, (a5)
; LMULMAX1-NEXT: vse64.v v26, (a1)
; LMULMAX1-NEXT: vse64.v v26, (a4)
; LMULMAX1-NEXT: vse64.v v26, (a7)
; LMULMAX1-NEXT: vse64.v v26, (a0)
; LMULMAX1-NEXT: vse64.v v26, (a6)
; LMULMAX1-NEXT: ret
%a = load <16 x double>, <16 x double>* %x
%b = extractelement <16 x double> %a, i32 10
%c = insertelement <16 x double> undef, double %b, i32 0
%d = shufflevector <16 x double> %c, <16 x double> undef, <16 x i32> zeroinitializer
store <16 x double> %d, <16 x double>* %x
ret void
}

0 comments on commit 0c254b4

Please sign in to comment.