Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a). #88294

Merged
merged 3 commits into from
Apr 23, 2024

Conversation

paulwalker-arm
Copy link
Collaborator

@paulwalker-arm paulwalker-arm commented Apr 10, 2024

Add similar isel patterns for lt, gt and hi comparison types.

@llvmbot
Copy link
Collaborator

llvmbot commented Apr 10, 2024

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

Changes

Add similar isel patterns for all comparison types.


Patch is 33.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88294.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+17-17)
  • (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+22-2)
  • (added) llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll (+677)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a519d81362a73a..b37d926ab1816b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2037,15 +2037,15 @@ let Predicates = [HasSVEorSME] in {
   defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
   defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
 
-  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
-  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
-  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
-  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels>;
+  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
+  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
+  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
 
-  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
-  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele>;
-  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
-  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels>;
+  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
+  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
+  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
 
   def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
   def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
@@ -3792,15 +3792,15 @@ let Predicates = [HasSVE2orSME] in {
   defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx", 0b01, int_aarch64_sve_tbx>;
 
   // SVE2 integer compare scalar count and limit
-  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
-  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
-  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
-  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
-
-  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege>;
-  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
-  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
-  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
+  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
+  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
+
+  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
+  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
 
   // SVE2 pointer conflict compare
   defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index ee8292fdd8839a..e34f26956f3bca 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5359,7 +5359,8 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
   let isWhile = 1;
 }
 
-multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op,
+                             SDPatternOperator rev_op> {
   def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
   def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
   def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
@@ -5369,9 +5370,19 @@ multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8i1,  op, i32, i32, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4i1,  op, i32, i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i1,  op, i32, i32, !cast<Instruction>(NAME # _D)>;
+
+  def : Pat<(nxv16i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_B") $op1, $op2)>;
+  def : Pat<(nxv8i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_H") $op1, $op2)>;
+  def : Pat<(nxv4i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_S") $op1, $op2)>;
+  def : Pat<(nxv2i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_D") $op1, $op2)>;
 }
 
-multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op,
+                             SDPatternOperator rev_op> {
   def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
   def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
   def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
@@ -5381,6 +5392,15 @@ multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8i1,  op, i64, i64, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4i1,  op, i64, i64, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i1,  op, i64, i64, !cast<Instruction>(NAME # _D)>;
+
+  def : Pat<(nxv16i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_B") $op1, $op2)>;
+  def : Pat<(nxv8i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_H") $op1, $op2)>;
+  def : Pat<(nxv4i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_S") $op1, $op2)>;
+  def : Pat<(nxv2i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_D") $op1, $op2)>;
 }
 
 class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
new file mode 100644
index 00000000000000..b31922b8bc30ad
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
@@ -0,0 +1,677 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mattr=+sme < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; WHILEGE
+;
+
+define <vscale x 16 x i1> @whilege_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilege_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilege_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilege_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilege_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilege_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilege_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEHS
+;
+
+define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehs_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehs_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehs_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehs_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEGT
+;
+
+define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilegt_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilegt_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilegt_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilegt_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilegt_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilegt_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEHI
+;
+
+define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilehi_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehi_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehi_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehi_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, x0, x1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehi_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, w0, w1
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, x0, x1
+; CHE...
[truncated]

Copy link
Collaborator

@huntergr-arm huntergr-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hadn't appreciated that whilegt and friends counted backwards from the last element, but it makes sense. LGTM.

@paulwalker-arm
Copy link
Collaborator Author

It turns out the while instructions don't have infinite precision and thus the equality variants have different behaviour when a maximum/minimum integer is in place (generally resulting in an all true predicate when wrapping occurs), which make this transformation invalid. I'll upload a new version with those patterns removed.

Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

@paulwalker-arm paulwalker-arm merged commit a9689c6 into llvm:main Apr 23, 2024
4 checks passed
@paulwalker-arm paulwalker-arm deleted the sve-reversed-while branch April 23, 2024 10:39
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants