Skip to content

Commit

Permalink
[RISCV] Support multiple levels of truncates in combineTruncToVnclip. (
Browse files Browse the repository at this point in the history
…#93752)

We can use multiple vnclips to saturate an i32 value into an i8 value.
  • Loading branch information
topperc authored May 31, 2024
1 parent 5c7f7cc commit edf4e02
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 211 deletions.
32 changes: 24 additions & 8 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16285,21 +16285,37 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
return SDValue();
};

SDValue Src = N->getOperand(0);

// Look through multiple layers of truncates.
while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
Src.hasOneUse())
Src = Src.getOperand(0);

SDValue Val;
unsigned ClipOpc;
if ((Val = DetectUSatPattern(N->getOperand(0))))
if ((Val = DetectUSatPattern(Src)))
ClipOpc = RISCVISD::VNCLIPU_VL;
else if ((Val = DetectSSatPattern(N->getOperand(0))))
else if ((Val = DetectSSatPattern(Src)))
ClipOpc = RISCVISD::VNCLIP_VL;
else
return SDValue();

// Rounding mode here is arbitrary since we aren't shifting out any bits.
return DAG.getNode(
ClipOpc, DL, VT,
{Val, DAG.getConstant(0, DL, VT), DAG.getUNDEF(VT), Mask,
DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()),
VL});
MVT ValVT = Val.getSimpleValueType();

do {
MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
ValVT = ValVT.changeVectorElementType(ValEltVT);
// Rounding mode here is arbitrary since we aren't shifting out any bits.
Val = DAG.getNode(
ClipOpc, DL, ValVT,
{Val, DAG.getConstant(0, DL, ValVT), DAG.getUNDEF(VT), Mask,
DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()),
VL});
} while (ValVT != VT);

return Val;
}

SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
Expand Down
144 changes: 44 additions & 100 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -392,16 +392,11 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: li a0, -128
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
Expand All @@ -415,16 +410,11 @@ define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i32_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: li a0, -128
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
Expand All @@ -438,14 +428,11 @@ define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u32_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
Expand All @@ -461,12 +448,10 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
Expand All @@ -482,13 +467,11 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i32>, ptr %x, align 16
Expand All @@ -502,18 +485,13 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a0, -128
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
Expand All @@ -527,18 +505,13 @@ define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i8i64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a0, 127
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: li a0, -128
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclip.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
Expand All @@ -552,16 +525,13 @@ define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u8u64_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
Expand All @@ -577,14 +547,12 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
Expand All @@ -600,15 +568,13 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a0, 255
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vmax.vx v8, v8, zero
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 0
; CHECK-NEXT: vnclipu.wi v8, v8, 0
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
Expand All @@ -622,17 +588,11 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_maxmin:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: addiw a0, a0, -1
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
Expand All @@ -646,17 +606,11 @@ define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_i16i64_minmax:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: addiw a0, a0, -1
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: lui a0, 1048568
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclip.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclip.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
Expand Down Expand Up @@ -691,15 +645,11 @@ define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) {
define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
; CHECK-LABEL: trunc_sat_u16u64_min:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: addiw a0, a0, -1
; CHECK-NEXT: vminu.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 32
Expand All @@ -716,13 +666,10 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: addiw a0, a0, -1
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
Expand All @@ -738,15 +685,12 @@ define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: addiw a0, a0, -1
; CHECK-NEXT: vmin.vx v8, v8, a0
; CHECK-NEXT: li a0, 50
; CHECK-NEXT: vmax.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vnsrl.wi v10, v8, 0
; CHECK-NEXT: vnclipu.wi v10, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vnclipu.wi v8, v10, 0
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%1 = load <4 x i64>, ptr %x, align 16
Expand Down
Loading

0 comments on commit edf4e02

Please sign in to comment.