[LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a). #88294

paulwalker-arm · 2024-04-10T16:38:55Z

Add similar isel patterns for lt, gt and hi comparison types.

Add similar isel patterns for all comparison types.

llvmbot · 2024-04-10T16:39:30Z

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

Changes

Add similar isel patterns for all comparison types.

Patch is 33.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88294.diff

3 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+17-17)
(modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+22-2)
(added) llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll (+677)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a519d81362a73a..b37d926ab1816b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2037,15 +2037,15 @@ let Predicates = [HasSVEorSME] in {
   defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
   defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
 
-  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
-  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
-  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
-  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels>;
+  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
+  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
+  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
 
-  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
-  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele>;
-  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
-  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels>;
+  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
+  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
+  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
 
   def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
   def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
@@ -3792,15 +3792,15 @@ let Predicates = [HasSVE2orSME] in {
   defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx", 0b01, int_aarch64_sve_tbx>;
 
   // SVE2 integer compare scalar count and limit
-  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
-  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
-  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
-  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
-
-  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege>;
-  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
-  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
-  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
+  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
+  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
+
+  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
+  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
 
   // SVE2 pointer conflict compare
   defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index ee8292fdd8839a..e34f26956f3bca 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5359,7 +5359,8 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
   let isWhile = 1;
 }
 
-multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op,
+                             SDPatternOperator rev_op> {
   def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
   def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
   def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
@@ -5369,9 +5370,19 @@ multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8i1,  op, i32, i32, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4i1,  op, i32, i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i1,  op, i32, i32, !cast<Instruction>(NAME # _D)>;
+
+  def : Pat<(nxv16i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_B") $op1, $op2)>;
+  def : Pat<(nxv8i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_H") $op1, $op2)>;
+  def : Pat<(nxv4i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_S") $op1, $op2)>;
+  def : Pat<(nxv2i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_D") $op1, $op2)>;
 }
 
-multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op,
+                             SDPatternOperator rev_op> {
   def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
   def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
   def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
@@ -5381,6 +5392,15 @@ multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8i1,  op, i64, i64, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4i1,  op, i64, i64, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i1,  op, i64, i64, !cast<Instruction>(NAME # _D)>;
+
+  def : Pat<(nxv16i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_B") $op1, $op2)>;
+  def : Pat<(nxv8i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_H") $op1, $op2)>;
+  def : Pat<(nxv4i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_S") $op1, $op2)>;
+  def : Pat<(nxv2i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_D") $op1, $op2)>;
 }
 
 class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
new file mode 100644
index 00000000000000..b31922b8bc30ad
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
@@ -0,0 +1,677 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mattr=+sme < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; WHILEGE
+;
+
+define <vscale x 16 x i1> @whilege_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilege_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilege_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilege_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilege_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilege_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilege_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEHS
+;
+
+define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehs_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehs_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehs_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehs_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEGT
+;
+
+define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilegt_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilegt_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilegt_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilegt_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilegt_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilegt_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEHI
+;
+
+define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilehi_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehi_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehi_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehi_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehi_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, x0, x1
+; CHE...
[truncated]

huntergr-arm

Hadn't appreciated that whilegt and friends counted backwards from the last element, but it makes sense. LGTM.

paulwalker-arm · 2024-04-17T10:03:14Z

It turns out the while instructions don't have infinite precision and thus the equality variants have different behaviour when a maximum/minimum integer is in place (generally resulting in an all true predicate when wrapping occurs), which make this transformation invalid. I'll upload a new version with those patterns removed.

david-arm

LGTM!

paulwalker-arm added 2 commits April 10, 2024 17:13

Add tests for rev(while##(a,b).

ff5a750

[LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a).

4090b5b

Add similar isel patterns for all comparison types.

llvmbot added the backend:AArch64 label Apr 10, 2024

paulwalker-arm requested review from huntergr-arm and david-arm April 15, 2024 11:51

huntergr-arm approved these changes Apr 16, 2024

View reviewed changes

Prevent unsafe transformation for the equality variants.

8af2994

david-arm approved these changes Apr 19, 2024

View reviewed changes

paulwalker-arm merged commit a9689c6 into llvm:main Apr 23, 2024
4 checks passed

paulwalker-arm deleted the sve-reversed-while branch April 23, 2024 10:39

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a). #88294

[LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a). #88294

paulwalker-arm commented Apr 10, 2024 •

edited

Loading

llvmbot commented Apr 10, 2024

huntergr-arm left a comment

paulwalker-arm commented Apr 17, 2024

david-arm left a comment

[LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a). #88294

[LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a). #88294

Conversation

paulwalker-arm commented Apr 10, 2024 • edited Loading

llvmbot commented Apr 10, 2024

huntergr-arm left a comment

Choose a reason for hiding this comment

paulwalker-arm commented Apr 17, 2024

david-arm left a comment

Choose a reason for hiding this comment

paulwalker-arm commented Apr 10, 2024 •

edited

Loading