[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. #88413

dtemirbulatov · 2024-04-11T16:40:47Z

Allow to fold or/and-and to BSL instuction for scalable vectors.

llvmbot · 2024-04-11T16:41:17Z

@llvm/pr-subscribers-backend-aarch64

Author: Dinar Temirbulatov (dtemirbulatov)

Changes

Allow to fold or/and-and to BSL instuction for scalable vectors.

Full diff: https://github.com/llvm/llvm-project/pull/88413.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+4-2)
(added) llvm/test/CodeGen/AArch64/sve2-bitselect.ll (+254)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 80181a77c9d238..d9aabb64125a4f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17942,11 +17942,13 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
       } else
         continue;
 
-      if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
+      if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()) &&
+          !ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
         continue;
 
       // Constant ones is always righthand operand of the Add.
-      if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
+      if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()) &&
+          !ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
         continue;
 
       if (Sub.getOperand(1) != Add.getOperand(0))
diff --git a/llvm/test/CodeGen/AArch64/sve2-bitselect.ll b/llvm/test/CodeGen/AArch64/sve2-bitselect.ll
new file mode 100644
index 00000000000000..9ceeffc2e5d2ab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-bitselect.ll
@@ -0,0 +1,254 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64"
+
+; Check that an expanded vbsl(vneg(pre_cond), left, right) lowers to a VBSL
+; during ISEL.
+;
+; Subtly different from a plain vector bit select: operand representing the
+; condition has been negated (-v, not to be confused with bitwise_not(v)).
+
+; Each vbsl_neg_cond_xxxx tests one of the 16 permutations of the operands.
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0000:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+  %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
+  ret <vscale x 4 x i32> %bsl0000
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0001(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0001:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+  %bsl0001 = or <vscale x 4 x i32> %right_bits_0, %left_bits_1
+  ret <vscale x 4 x i32> %bsl0001
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0010(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0010:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+  %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+  %bsl0010 = or <vscale x 4 x i32> %right_bits_1, %left_bits_0
+  ret <vscale x 4 x i32> %bsl0010
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0011(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0011:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+  %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+  %bsl0011 = or <vscale x 4 x i32> %right_bits_1, %left_bits_1
+  ret <vscale x 4 x i32> %bsl0011
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0100(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0100:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+  %bsl0100 = or <vscale x 4 x i32> %left_bits_0, %right_bits_0
+  ret <vscale x 4 x i32> %bsl0100
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0101(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0101:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+  %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+  %bsl0101 = or <vscale x 4 x i32> %left_bits_0, %right_bits_1
+  ret <vscale x 4 x i32> %bsl0101
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0110(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0110:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+  %bsl0110 = or <vscale x 4 x i32> %left_bits_1, %right_bits_0
+  ret <vscale x 4 x i32> %bsl0110
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0111(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_0111:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_1 = and <vscale x 4 x i32> %left, %neg_cond
+  %right_bits_1 = and <vscale x 4 x i32> %right, %min_cond
+  %bsl0111 = or <vscale x 4 x i32> %left_bits_1, %right_bits_1
+  ret <vscale x 4 x i32> %bsl0111
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1000:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+  %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+  %bsl1000 = or <vscale x 4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_0
+  ret <vscale x 4 x i32> %bsl1000
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1001(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1001:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+  %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+  %bsl1001 = or <vscale x 4 x i32> %flip_cond_right_bits_0, %flip_cond_left_bits_1
+  ret <vscale x 4 x i32> %bsl1001
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1010(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1010:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+  %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+  %bsl1010 = or <vscale x 4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_0
+  ret <vscale x 4 x i32> %bsl1010
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1011(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1011:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+  %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+  %bsl1011 = or <vscale x 4 x i32> %flip_cond_right_bits_1, %flip_cond_left_bits_1
+  ret <vscale x 4 x i32> %bsl1011
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1100(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1100:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+  %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+  %bsl1100 = or <vscale x 4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_0
+  ret <vscale x 4 x i32> %bsl1100
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1101(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1101:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_0 = and <vscale x 4 x i32> %min_cond, %left
+  %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+  %bsl1101 = or <vscale x 4 x i32> %flip_cond_left_bits_0, %flip_cond_right_bits_1
+  ret <vscale x 4 x i32> %bsl1101
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1110(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1110:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+  %flip_cond_right_bits_0 = and <vscale x 4 x i32> %neg_cond, %right
+  %bsl1110 = or <vscale x 4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_0
+  ret <vscale x 4 x i32> %bsl1110
+}
+
+define <vscale x 4 x i32> @vbsl_neg_cond_1111(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: vbsl_neg_cond_1111:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %flip_cond_left_bits_1 = and <vscale x 4 x i32> %left, %min_cond
+  %flip_cond_right_bits_1 = and <vscale x 4 x i32> %right, %neg_cond
+  %bsl1111 = or <vscale x 4 x i32> %flip_cond_left_bits_1, %flip_cond_right_bits_1
+  ret <vscale x 4 x i32> %bsl1111
+}
+
+attributes #0 = { "target-features"="+sve2" }

davemgreen · 2024-04-12T07:09:36Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+      if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()) &&
+          !ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))


I believe isBuildVectorAllZeros is just a sub-set of isConstantSplatVectorAllZeros (it just calls isConstantSplatVectorAllZeros with BuildVectorOnly=true). So hopefully only the second is needed.

david-arm · 2024-04-12T10:39:04Z

llvm/test/CodeGen/AArch64/sve2-bitselect.ll

+
+; Each vbsl_neg_cond_xxxx tests one of the 16 permutations of the operands.
+
+define <vscale x 4 x i32> @vbsl_neg_cond_0000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {


Given all the tests in this file are repeats of those in neon-bitselect.ll I wonder if it's necessary to test all permutations? The NEON tests cover all cases and the code is the same for both NEON and SVE so perhaps we only need @vbsl_neg_cond_0000 as a test?

Also, it's probably worth adding at least one test for the other DAG combine - (or (and a b) (and (not a) c)) => (bsl a b c). I think there are some NEON examples of this in neon-bitwise-instructions.ll

david-arm · 2024-04-12T16:20:18Z

This is just a thought, but SVE2 also has NBSL which is essentially just NOT(BSL). Is there any value in adding new patterns for this too?

dtemirbulatov · 2024-04-15T15:58:48Z

This is just a thought, but SVE2 also has NBSL which is essentially just NOT(BSL). Is there any value in adding new patterns for this too?

yes, I think I could do it in the follow-up patch.

david-arm · 2024-04-17T09:57:58Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

@@ -17942,11 +17942,13 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
      } else
        continue;

-      if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
+      if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()) &&
+          !ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))


Isn't the second call identical to the first? I think you only need one.

davemgreen

Thanks. LGTM

Allow to fold or/and-and to BSL instuction for scalable vectors.

…lvm#88413) Allow to fold or/and-and to BSL instuction for scalable vectors.

dtemirbulatov requested review from SamTebbs33, huntergr-arm, sdesmalen-arm and kmclaughlin-arm April 11, 2024 16:40

dtemirbulatov self-assigned this Apr 11, 2024

llvmbot added the backend:AArch64 label Apr 11, 2024

davemgreen reviewed Apr 12, 2024

View reviewed changes

david-arm reviewed Apr 12, 2024

View reviewed changes

david-arm reviewed Apr 17, 2024

View reviewed changes

davemgreen approved these changes Apr 18, 2024

View reviewed changes

dtemirbulatov added 3 commits April 18, 2024 12:49

[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub.

4635801

Allow to fold or/and-and to BSL instuction for scalable vectors.

Resolved remarks.

ccab06a

Removed isConstantSplatVector duplicates.

cbc085d

dtemirbulatov force-pushed the svl2-bsl-add-sub-fold branch from dac6e75 to cbc085d Compare April 18, 2024 13:09

dtemirbulatov merged commit 6f26867 into llvm:main Apr 19, 2024
3 of 4 checks passed

dtemirbulatov deleted the svl2-bsl-add-sub-fold branch April 19, 2024 10:00

aniplcc pushed a commit to aniplcc/llvm-project that referenced this pull request Apr 21, 2024

[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (l…

33b83e6

…lvm#88413) Allow to fold or/and-and to BSL instuction for scalable vectors.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. #88413

[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. #88413

dtemirbulatov commented Apr 11, 2024

llvmbot commented Apr 11, 2024

davemgreen Apr 12, 2024

dtemirbulatov Apr 15, 2024

david-arm Apr 12, 2024

dtemirbulatov Apr 15, 2024

david-arm commented Apr 12, 2024

dtemirbulatov commented Apr 15, 2024

david-arm Apr 17, 2024

dtemirbulatov Apr 17, 2024

davemgreen left a comment

		if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()) &&
		!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))


		; Each vbsl_neg_cond_xxxx tests one of the 16 permutations of the operands.

		define <vscale x 4 x i32> @vbsl_neg_cond_0000(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {

[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. #88413

[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. #88413

Conversation

dtemirbulatov commented Apr 11, 2024

llvmbot commented Apr 11, 2024

davemgreen Apr 12, 2024

Choose a reason for hiding this comment

dtemirbulatov Apr 15, 2024

Choose a reason for hiding this comment

david-arm Apr 12, 2024

Choose a reason for hiding this comment

dtemirbulatov Apr 15, 2024

Choose a reason for hiding this comment

david-arm commented Apr 12, 2024

dtemirbulatov commented Apr 15, 2024

david-arm Apr 17, 2024

Choose a reason for hiding this comment

dtemirbulatov Apr 17, 2024

Choose a reason for hiding this comment

davemgreen left a comment

Choose a reason for hiding this comment