[RISCV][ISel] Remove redundant min/max in saturating truncation #75145

sun-jacobi · 2023-12-12T07:43:05Z

This patch closed #73424, which is also a missed-optimization case similar to #68466 on X86.

Source Code

define void @trunc_sat_i8i16(ptr %x, ptr %y) {
  %1 = load <8 x i16>, ptr %x, align 16
  %2 = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %1, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>)
  %3 = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %2, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>)
  %4 = trunc <8 x i16> %3 to <8 x i8>
  store <8 x i8> %4, ptr %y, align 8
  ret void
}

Before this patch:

trunc_sat_i8i16:                  # @trunc_maxmin_id_i8i16
        vsetivli        zero, 8, e16, m1, ta, ma
        vle16.v v8, (a0)
        li      a0, -128
        vmax.vx v8, v8, a0
        li      a0, 127
        vmin.vx v8, v8, a0
        vsetvli zero, zero, e8, mf2, ta, ma
        vnsrl.wi        v8, v8, 0
        vse8.v  v8, (a1)
        ret

After this patch:

trunc_sat_i8i16:                  # @trunc_maxmin_id_i8i16
	vsetivli	zero, 8, e8, mf2, ta, ma
	vle16.v	v8, (a0)
	csrwi	vxrm, 0
	vnclip.wi	v8, v8, 0
	vse8.v	v8, (a1)
	ret

llvmbot · 2023-12-12T07:43:32Z

@llvm/pr-subscribers-backend-risc-v

Author: Chia (sun-jacobi)

Changes

This patch is aiming at fixing a missed-optimization case similar to #68466 on X86.

Source Code

define void @<!-- -->trunc_maxmin_id_i8i16(ptr %x, ptr %y) {
  %1 = load &lt;8 x i16&gt;, ptr %x, align 16
  %2 = tail call &lt;8 x i16&gt; @<!-- -->llvm.smax.v8i16(&lt;8 x i16&gt; %1, &lt;8 x i16&gt; &lt;i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128&gt;)
  %3 = tail call &lt;8 x i16&gt; @<!-- -->llvm.smin.v8i16(&lt;8 x i16&gt; %2, &lt;8 x i16&gt; &lt;i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127&gt;)
  %4 = trunc &lt;8 x i16&gt; %3 to &lt;8 x i8&gt;
  store &lt;8 x i8&gt; %4, ptr %y, align 8
  ret void
}

Before this patch:

trunc_maxmin_id_i8i16:                  # @<!-- -->trunc_maxmin_id_i8i16
        vsetivli        zero, 8, e16, m1, ta, ma
        vle16.v v8, (a0)
        li      a0, -128
        vmax.vx v8, v8, a0
        li      a0, 127
        vmin.vx v8, v8, a0
        vsetvli zero, zero, e8, mf2, ta, ma
        vnsrl.wi        v8, v8, 0
        vse8.v  v8, (a1)
        ret

After this patch:

trunc_maxmin_id_i8i16:                  # @<!-- -->trunc_maxmin_id_i8i16
	vsetivli	zero, 8, e8, mf2, ta, ma
	vle16.v	v8, (a0)
	vnsrl.wi	v8, v8, 0
	vse8.v	v8, (a1)
	ret

This issue is also inspired by #73424, but not using vnclip

Full diff: https://github.com/llvm/llvm-project/pull/75145.diff

1 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td (+54)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index dc6b57fad3210..91eb9d775682c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1618,6 +1618,60 @@ multiclass VPatBinaryFPWVL_VV_VF_WV_WF_RM<SDNode vop, SDNode vop_w, string instr
   }
 }
 
+
+multiclass VPatTruncSplatMaxMinIdentityBase<VTypeInfo vti, VTypeInfo wti,
+  SDPatternOperator vop1, int vid1, SDPatternOperator vop2, int vid2> {
+  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                               GetVTypePredicates<wti>.Predicates) in
+  def : Pat<(vti.Vector (riscv_trunc_vector_vl
+    (wti.Vector (vop1
+        (wti.Vector (vop2
+            (wti.Vector wti.RegClass:$rs1),
+            (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid2, (XLenVT srcvalue))),
+            (wti.Vector undef),(wti.Mask V0), VLOpFrag)),
+        (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid1, (XLenVT srcvalue))),
+        (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+    (vti.Mask V0), VLOpFrag)),
+    (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK")
+      (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+      (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+}
+
+multiclass VPatTruncSplatMinIdentityBase<VTypeInfo vti, VTypeInfo wti,
+  SDPatternOperator vop, int vid> {
+  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                               GetVTypePredicates<wti>.Predicates) in
+  def : Pat<(vti.Vector (riscv_trunc_vector_vl
+    (wti.Vector (vop
+      (wti.Vector wti.RegClass:$rs1),
+      (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), vid, (XLenVT srcvalue))),
+      (wti.Vector undef), (wti.Mask V0), VLOpFrag)),
+    (vti.Mask V0), VLOpFrag)),
+    (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK")
+      (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0,
+      (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+}
+
+
+multiclass VPatTruncSplatMaxMinIdentity<VTypeInfo vti, VTypeInfo wti> {
+  defvar sew = vti.SEW;
+  defvar umin_id = !sub(!shl(1, sew), 1);
+  defvar umax_id = 0;
+  defvar smin_id = !sub(!shl(1, !sub(sew, 1)), 1);
+  defvar smax_id = !sub(0, !shl(1, !sub(sew, 1)));
+
+  defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_umax_vl, umax_id, riscv_umin_vl, umin_id>;
+  defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_umin_vl, umin_id, riscv_umax_vl, umax_id>;
+  defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_smin_vl, smin_id, riscv_smax_vl, smax_id>;
+  defm : VPatTruncSplatMaxMinIdentityBase<vti, wti, riscv_smax_vl, smax_id, riscv_smin_vl, smin_id>;
+
+  defm : VPatTruncSplatMinIdentityBase<vti, wti, riscv_umin_vl, umin_id>;
+
+}
+
+foreach vtiToWti = AllWidenableIntVectors in
+  defm : VPatTruncSplatMaxMinIdentity<vtiToWti.Vti, vtiToWti.Wti>;
+
 multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruction_name> {
   foreach vtiToWti = AllWidenableIntVectors in {
     defvar vti = vtiToWti.Vti;

topperc · 2023-12-12T08:18:18Z

How can you use just vnsrl? vnsrl discards the upper bits without any max or min.

sun-jacobi · 2023-12-12T08:34:09Z

How can you use just vnsrl? vnsrl discards the upper bits without any max or min.

Yes, you are right.

But if the range created by a pair of max/min operation is precisely the range of the truncation destination,
then the pair could be removed.

In the above case, [-128, 127] is the value range of u8.
Since we truncate the u16 vector into a u8 vector, removing the max/min won't change the final result, AFAIU.

topperc · 2023-12-12T09:00:05Z

How can you use just vnsrl? vnsrl discards the upper bits without any max or min.

Yes, you are right.

But if the range created by a pair of max/min operation is precisely the range of the truncation destination,

then the pair could be removed.

In the above case, [-128, 127] is the value range of u8.

Since we truncate the u16 vector into a u8 vector, removing the max/min won't change the final result, AFAIU.

Let's imagine the input is -32767(0x8001) in i16. The smax with -128 should give -128(0xff80). Then it will be truncated to 0x80 which is -128 in i8.

A vnsrl by itself will drop the upper bits and give 0x01. This is incorrect.

topperc

I don't think this is correct. We need to use vnclip.

sun-jacobi · 2023-12-14T07:02:52Z

Thank you for correcting me. I will fix it soon.

topperc · 2023-12-16T06:21:58Z

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

+  defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
+  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+  defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_umax_vl,


Shouldn't umax/umin use VNCLIPU and smax/smin use VNCLIP?

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

topperc · 2023-12-20T17:53:37Z

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

+  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+  defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_umax_vl,
+                                                umaxval, riscv_umin_vl, uminval>;


this is indented too far

topperc · 2023-12-20T17:53:44Z

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

+  defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_umax_vl,
+                                                umaxval, riscv_umin_vl, uminval>;
+  defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl,
+                                                sminval, riscv_smax_vl, smaxval>;


topperc · 2023-12-20T17:55:54Z

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

+  defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1);
+  defvar smaxval = !sub(0, !shl(1, !sub(sew, 1)));
+
+  defm : VPatTruncSatClipMaxMin<"PseudoVNCLIPU", vti, wti, riscv_umax_vl,


Is this unsigned pattern needed? Won't a umax with 0 be deleted by DAG combiner? Leaving only the umin.

topperc

LGTM

sun-jacobi · 2023-12-28T04:40:02Z

@topperc

Could you help me request more reviews?
It seems that I can not request review (or merge) without the write access.

wangpc-pp · 2023-12-28T06:12:42Z

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

+  SDPatternOperator op1, int op1_value, SDPatternOperator op2, int op2_value> {
+  let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                               GetVTypePredicates<wti>.Predicates) in
+  def : Pat<(vti.Vector (riscv_trunc_vector_vl


There is just one def in VPatTruncSatClipMaxMinBase and VPatTruncSatClipUMin, can we use class inheritances here?

Fixes llvm#73424. If the range created by a min and max is precisely the range of trunc target, the min/max could be removed.

…UMin

Similar to #75145, but for scalable vectors. Specifically, this patch works for the below optimization case: ## Source Code ``` define void @trunc_sat_i8i16_maxmin(ptr %x, ptr %y) { %1 = load <vscale x 4 x i16>, ptr %x, align 16 %2 = tail call <vscale x 4 x i16> @llvm.smax.v4i16(<vscale x 4 x i16> %1, <vscale x 4 x i16> splat (i16 -128)) %3 = tail call <vscale x 4 x i16> @llvm.smin.v4i16(<vscale x 4 x i16> %2, <vscale x 4 x i16> splat (i16 127)) %4 = trunc <vscale x 4 x i16> %3 to <vscale x 4 x i8> store <vscale x 4 x i8> %4, ptr %y, align 8 ret void } ``` ## Before this patch [Compiler Explorer](https://godbolt.org/z/EKc9eGvo8) ``` trunc_sat_i8i16_maxmin: vl1re16.v v8, (a0) li a0, -128 vsetvli a2, zero, e16, m1, ta, ma vmax.vx v8, v8, a0 li a0, 127 vmin.vx v8, v8, a0 vsetvli zero, zero, e8, mf2, ta, ma vnsrl.wi v8, v8, 0 vse8.v v8, (a1) ret ``` ## After this patch ``` trunc_sat_i8i16_maxmin: vsetivli zero, 4, e8, mf4, ta, ma vle16.v v8, (a0) vnclip.wi v8, v8, 0 vse8.v v8, (a1) ret ```

llvmbot added the backend:RISC-V label Dec 12, 2023

topperc requested changes Dec 13, 2023

View reviewed changes

topperc reviewed Dec 16, 2023

View reviewed changes

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td Outdated Show resolved Hide resolved

sun-jacobi requested a review from topperc December 18, 2023 07:51

topperc reviewed Dec 20, 2023

View reviewed changes

sun-jacobi requested a review from topperc December 21, 2023 05:48

topperc approved these changes Dec 21, 2023

View reviewed changes

topperc requested review from preames, wangpc-pp and lukel97 December 28, 2023 04:40

wangpc-pp reviewed Dec 28, 2023

View reviewed changes

sun-jacobi requested a review from wangpc-pp December 28, 2023 13:38

sun-jacobi added 10 commits December 29, 2023 13:54

[RISCV][ISel] remove redundant min/max followed by a trunc.

af443b0

Fixes llvm#73424. If the range created by a min and max is precisely the range of trunc target, the min/max could be removed.

[RISCV][Isel] use vnclip for saturating truncation.

d21e0f3

[RISCV][Isel] fix (s|u)(max|min) value usage for saturating truncation.

c3eeb1a

[RISCV] fix uminval in VPatTruncSatClipUMin.

a2deab0

[RISCV][ISel] update fpclamptosat_vec.ll

d6d3e25

[RISCV] add trunc-sat-clip.ll test

54a59e0

[RISCV] update fpclamptosat_vec.ll

0565df3

[RISCV] fix indent for VPatTruncSatClipMaxMin

10b262f

[RISCV] remove VPatTruncSatClipMaxMin for unsigned saturating truncation

d29ebba

[RISCV] use class for VPatTruncSatClipMaxMinBase and VPatTruncSatClip…

4d9b75d

…UMin

sun-jacobi force-pushed the combine-trunc-minmax-identity branch from f4a3ee0 to 4d9b75d Compare December 29, 2023 05:22

ChunyuLiao merged commit 87779fd into llvm:main Dec 29, 2023
4 checks passed

sun-jacobi deleted the combine-trunc-minmax-identity branch December 29, 2023 14:11

sun-jacobi mentioned this pull request Mar 20, 2024

[SelectionDAG] Add a new ISD Node for vector saturating truncation #85903

Open

sun-jacobi mentioned this pull request Apr 14, 2024

[RISCV] Use vnclip for scalable vector saturating truncation. #88648

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV][ISel] Remove redundant min/max in saturating truncation #75145

[RISCV][ISel] Remove redundant min/max in saturating truncation #75145

sun-jacobi commented Dec 12, 2023 •

edited

llvmbot commented Dec 12, 2023

Source Code

Before this patch:

After this patch:

topperc commented Dec 12, 2023

sun-jacobi commented Dec 12, 2023 •

edited

topperc commented Dec 12, 2023

topperc left a comment

sun-jacobi commented Dec 14, 2023

topperc Dec 16, 2023

topperc Dec 20, 2023

topperc Dec 20, 2023

topperc Dec 20, 2023

topperc left a comment

sun-jacobi commented Dec 28, 2023

wangpc-pp Dec 28, 2023

[RISCV][ISel] Remove redundant min/max in saturating truncation #75145

[RISCV][ISel] Remove redundant min/max in saturating truncation #75145

Conversation

sun-jacobi commented Dec 12, 2023 • edited

Source Code

Before this patch:

After this patch:

llvmbot commented Dec 12, 2023

Source Code

Before this patch:

After this patch:

topperc commented Dec 12, 2023

sun-jacobi commented Dec 12, 2023 • edited

topperc commented Dec 12, 2023

topperc left a comment

Choose a reason for hiding this comment

sun-jacobi commented Dec 14, 2023

topperc Dec 16, 2023

Choose a reason for hiding this comment

topperc Dec 20, 2023

Choose a reason for hiding this comment

topperc Dec 20, 2023

Choose a reason for hiding this comment

topperc Dec 20, 2023

Choose a reason for hiding this comment

topperc left a comment

Choose a reason for hiding this comment

sun-jacobi commented Dec 28, 2023

wangpc-pp Dec 28, 2023

Choose a reason for hiding this comment

sun-jacobi commented Dec 12, 2023 •

edited

sun-jacobi commented Dec 12, 2023 •

edited