Skip to content

Commit

Permalink
[RISCV] Optimize fixed vector ABS. Fix crash on scalable vector ABS f…
Browse files Browse the repository at this point in the history
…or SEW=64 with RV32.

The default fixed vector expansion uses sra+xor+add since it can't
see that smax is legal due to our custom handling. So we select
smax(X, sub(0, X)) manually.

Scalable vectors are able to use the smax expansion automatically
for most cases. It crashes in one case because getConstant can't build a
SPLAT_VECTOR for nxvXi64 when i64 scalars aren't legal. So
we manually emit a SPLAT_VECTOR_I64 for that case.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D97991
  • Loading branch information
topperc committed Mar 9, 2021
1 parent 478317f commit 72ecf2f
Show file tree
Hide file tree
Showing 4 changed files with 496 additions and 0 deletions.
43 changes: 43 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);

if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64)
setOperationAction(ISD::ABS, VT, Custom);

setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);

Expand Down Expand Up @@ -575,6 +578,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);

setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
Expand Down Expand Up @@ -1591,6 +1595,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
case ISD::UMAX:
return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
case ISD::ABS:
return lowerABS(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
}
Expand Down Expand Up @@ -2901,6 +2907,43 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
}

// Lower vector ABS to smax(X, sub(0, X)).
SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue X = Op.getOperand(0);

// For scalable vectors we just need to deal with i64 on RV32 since the
// default expansion crashes in getConstant.
if (VT.isScalableVector()) {
assert(!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64 &&
"Unexpected custom lowering!");
SDValue SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT,
DAG.getConstant(0, DL, MVT::i32));
SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, SplatZero, X);
return DAG.getNode(ISD::SMAX, DL, VT, X, NegX);
}

assert(VT.isFixedLengthVector() && "Unexpected type");

MVT ContainerVT =
RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget);
X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);

SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

SDValue SplatZero =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
SDValue NegX =
DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
SDValue Max =
DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);

return convertFromScalableVector(VT, Max, DAG, Subtarget);
}

SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
Expand Down
198 changes: 198 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s

declare <vscale x 1 x i16> @llvm.abs.nxv1i16(<vscale x 1 x i16>, i1)

define <vscale x 1 x i16> @vabs_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: vabs_nxv1i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu
; CHECK-NEXT: vrsub.vi v25, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v25
; CHECK-NEXT: ret
%r = call <vscale x 1 x i16> @llvm.abs.nxv1i16(<vscale x 1 x i16> %v, i1 false)
ret <vscale x 1 x i16> %r
}

declare <vscale x 2 x i16> @llvm.abs.nxv2i16(<vscale x 2 x i16>, i1)

define <vscale x 2 x i16> @vabs_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: vabs_nxv2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu
; CHECK-NEXT: vrsub.vi v25, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v25
; CHECK-NEXT: ret
%r = call <vscale x 2 x i16> @llvm.abs.nxv2i16(<vscale x 2 x i16> %v, i1 false)
ret <vscale x 2 x i16> %r
}

declare <vscale x 4 x i16> @llvm.abs.nxv4i16(<vscale x 4 x i16>, i1)

define <vscale x 4 x i16> @vabs_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vabs_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu
; CHECK-NEXT: vrsub.vi v25, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v25
; CHECK-NEXT: ret
%r = call <vscale x 4 x i16> @llvm.abs.nxv4i16(<vscale x 4 x i16> %v, i1 false)
ret <vscale x 4 x i16> %r
}

declare <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16>, i1)

define <vscale x 8 x i16> @vabs_nxv8i16(<vscale x 8 x i16> %v) {
; CHECK-LABEL: vabs_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu
; CHECK-NEXT: vrsub.vi v26, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v26
; CHECK-NEXT: ret
%r = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %v, i1 false)
ret <vscale x 8 x i16> %r
}

declare <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16>, i1)

define <vscale x 16 x i16> @vabs_nxv16i16(<vscale x 16 x i16> %v) {
; CHECK-LABEL: vabs_nxv16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu
; CHECK-NEXT: vrsub.vi v28, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v28
; CHECK-NEXT: ret
%r = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> %v, i1 false)
ret <vscale x 16 x i16> %r
}

declare <vscale x 32 x i16> @llvm.abs.nxv32i16(<vscale x 32 x i16>, i1)

define <vscale x 32 x i16> @vabs_nxv32i16(<vscale x 32 x i16> %v) {
; CHECK-LABEL: vabs_nxv32i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu
; CHECK-NEXT: vrsub.vi v16, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: ret
%r = call <vscale x 32 x i16> @llvm.abs.nxv32i16(<vscale x 32 x i16> %v, i1 false)
ret <vscale x 32 x i16> %r
}

declare <vscale x 1 x i32> @llvm.abs.nxv1i32(<vscale x 1 x i32>, i1)

define <vscale x 1 x i32> @vabs_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: vabs_nxv1i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu
; CHECK-NEXT: vrsub.vi v25, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v25
; CHECK-NEXT: ret
%r = call <vscale x 1 x i32> @llvm.abs.nxv1i32(<vscale x 1 x i32> %v, i1 false)
ret <vscale x 1 x i32> %r
}

declare <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32>, i1)

define <vscale x 2 x i32> @vabs_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: vabs_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu
; CHECK-NEXT: vrsub.vi v25, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v25
; CHECK-NEXT: ret
%r = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> %v, i1 false)
ret <vscale x 2 x i32> %r
}

declare <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32>, i1)

define <vscale x 4 x i32> @vabs_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vabs_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu
; CHECK-NEXT: vrsub.vi v26, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v26
; CHECK-NEXT: ret
%r = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %v, i1 false)
ret <vscale x 4 x i32> %r
}

declare <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32>, i1)

define <vscale x 8 x i32> @vabs_nxv8i32(<vscale x 8 x i32> %v) {
; CHECK-LABEL: vabs_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu
; CHECK-NEXT: vrsub.vi v28, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v28
; CHECK-NEXT: ret
%r = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> %v, i1 false)
ret <vscale x 8 x i32> %r
}

declare <vscale x 16 x i32> @llvm.abs.nxv16i32(<vscale x 16 x i32>, i1)

define <vscale x 16 x i32> @vabs_nxv16i32(<vscale x 16 x i32> %v) {
; CHECK-LABEL: vabs_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu
; CHECK-NEXT: vrsub.vi v16, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: ret
%r = call <vscale x 16 x i32> @llvm.abs.nxv16i32(<vscale x 16 x i32> %v, i1 false)
ret <vscale x 16 x i32> %r
}

declare <vscale x 1 x i64> @llvm.abs.nxv1i64(<vscale x 1 x i64>, i1)

define <vscale x 1 x i64> @vabs_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: vabs_nxv1i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
; CHECK-NEXT: vrsub.vi v25, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v25
; CHECK-NEXT: ret
%r = call <vscale x 1 x i64> @llvm.abs.nxv1i64(<vscale x 1 x i64> %v, i1 false)
ret <vscale x 1 x i64> %r
}

declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)

define <vscale x 2 x i64> @vabs_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: vabs_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vrsub.vi v26, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v26
; CHECK-NEXT: ret
%r = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %v, i1 false)
ret <vscale x 2 x i64> %r
}

declare <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64>, i1)

define <vscale x 4 x i64> @vabs_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: vabs_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vrsub.vi v28, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v28
; CHECK-NEXT: ret
%r = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %v, i1 false)
ret <vscale x 4 x i64> %r
}

declare <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64>, i1)

define <vscale x 8 x i64> @vabs_nxv8i64(<vscale x 8 x i64> %v) {
; CHECK-LABEL: vabs_nxv8i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
; CHECK-NEXT: vrsub.vi v16, v8, 0
; CHECK-NEXT: vmax.vv v8, v8, v16
; CHECK-NEXT: ret
%r = call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> %v, i1 false)
ret <vscale x 8 x i64> %r
}
Loading

0 comments on commit 72ecf2f

Please sign in to comment.