-
Notifications
You must be signed in to change notification settings - Fork 15k
[AArch64] Fold BUILD_VECTORs splats into users by using SVE immediates #165559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
When lowering Neon BUILD_VECTOR nodes attempt to fold constant splats into users by promoting the users to use SVE, when the splat immediate is a legal SVE immediate operand.
| } | ||
|
|
||
| static SDValue tryFoldSplatIntoUsersWithSVE(SDValue Op, SelectionDAG &DAG) { | ||
| auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if this is necessarily always preferable, or if there's some target property this should be dependent on?
|
@llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesWhen lowering Neon BUILD_VECTOR nodes attempt to fold constant splats into users by promoting the users to use SVE, when the splat immediate is a legal SVE immediate operand. Patch is 27.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165559.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e993b26..14debf6f8a40b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15501,6 +15501,72 @@ SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
return convertFromScalableVector(DAG, VT, Vec);
}
+static std::optional<int64_t> getSplatConstant(SDValue V,
+ ConstantSDNode *&Const) {
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V))
+ if ((Const = dyn_cast_if_present<ConstantSDNode>(BV->getSplatValue())))
+ return Const->getZExtValue();
+ return std::nullopt;
+}
+
+static bool isSVESplatImmForOp(unsigned Opcode, MVT VT, int64_t SplatImm) {
+ // TODO: Support more than integer binops.
+ switch (Opcode) {
+ case ISD::SUB:
+ case ISD::ADD:
+ return isUInt<8>(SplatImm) || (VT.getFixedSizeInBits() > 8 &&
+ isUInt<16>(SplatImm) && SplatImm % 256 == 0);
+ case ISD::XOR:
+ case ISD::OR:
+ case ISD::AND:
+ return AArch64_AM::isLogicalImmediate(SplatImm, 64);
+ case ISD::MUL:
+ return isInt<8>(SplatImm);
+ default:
+ return false;
+ }
+}
+
+static SDValue tryFoldSplatIntoUsersWithSVE(SDValue Op, SelectionDAG &DAG) {
+ auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ if (!Subtarget.isSVEorStreamingSVEAvailable())
+ return SDValue();
+
+ EVT VT = Op->getValueType(0);
+ if (!VT.is128BitVector())
+ return SDValue();
+
+ ConstantSDNode *Splat;
+ auto SplatImm = getSplatConstant(Op, Splat);
+ if (!SplatImm)
+ return SDValue();
+
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+
+ for (SDUse &U : Op->uses()) {
+ SDNode *User = U.getUser();
+ unsigned UserOpc = User->getOpcode();
+ if (U.getOperandNo() != 1 ||
+ !isSVESplatImmForOp(UserOpc, VT.getScalarType().getSimpleVT(),
+ *SplatImm))
+ continue;
+
+ SDLoc DL(U);
+ SDValue LHS =
+ convertToScalableVector(DAG, ContainerVT, User->getOperand(0));
+ SDValue SVESplat = DAG.getSplatVector(ContainerVT, DL, SDValue(Splat, 0));
+ SDValue Result = DAG.getNode(UserOpc, DL, ContainerVT, LHS, SVESplat);
+ Result = convertFromScalableVector(DAG, VT, Result);
+ DAG.ReplaceAllUsesWith(SDValue(User, 0), Result);
+ }
+
+ // FIXME: We always have to return SDValue() as LowerBUILD_VECTOR is called in
+ // many places, and there's no guarantee `Op->uses()` contains all the users.
+ // This means the BV will still be lowered (but then DCE'd if we replaced all
+ // users in this fold).
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -15535,6 +15601,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Op;
}
+ if (SDValue V = tryFoldSplatIntoUsersWithSVE(Op, DAG))
+ return V;
+
if (SDValue V = ConstantBuildVector(Op, DAG, Subtarget))
return V;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 0cd885e599817..00c25f72ec579 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -385,11 +385,11 @@ define <4 x i32> @amull_v4i16_v4i32(ptr %A, ptr %B) nounwind {
;
; CHECK-SVE-LABEL: amull_v4i16_v4i32:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: ldr d1, [x0]
-; CHECK-SVE-NEXT: ldr d2, [x1]
-; CHECK-SVE-NEXT: movi v0.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: smull v1.4s, v1.4h, v2.4h
-; CHECK-SVE-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT: ldr d0, [x0]
+; CHECK-SVE-NEXT: ldr d1, [x1]
+; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_v4i16_v4i32:
@@ -421,11 +421,11 @@ define <2 x i64> @amull_v2i32_v2i64(ptr %A, ptr %B) nounwind {
;
; CHECK-SVE-LABEL: amull_v2i32_v2i64:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: ldr d1, [x0]
-; CHECK-SVE-NEXT: ldr d2, [x1]
-; CHECK-SVE-NEXT: movi v0.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: smull v1.2d, v1.2s, v2.2s
-; CHECK-SVE-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-SVE-NEXT: ldr d0, [x0]
+; CHECK-SVE-NEXT: ldr d1, [x1]
+; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_v2i32_v2i64:
@@ -609,8 +609,8 @@ define <4 x i32> @amlal_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlal v0.4s, v1.4h, v2.4h
-; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlal_v4i16_v4i32:
@@ -650,8 +650,8 @@ define <2 x i64> @amlal_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlal v0.2d, v1.2s, v2.2s
-; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlal_v2i32_v2i64:
@@ -838,8 +838,8 @@ define <4 x i32> @amlsl_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlsl v0.4s, v1.4h, v2.4h
-; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlsl_v4i16_v4i32:
@@ -879,8 +879,8 @@ define <2 x i64> @amlsl_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlsl v0.2d, v1.2s, v2.2s
-; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlsl_v2i32_v2i64:
@@ -1118,8 +1118,8 @@ define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK-SVE-NEXT: mov w8, #1234 // =0x4d2
; CHECK-SVE-NEXT: dup v1.4h, w8
; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h
-; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_extvec_v4i16_v4i32:
@@ -1151,8 +1151,8 @@ define <2 x i64> @amull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK-SVE-NEXT: mov w8, #1234 // =0x4d2
; CHECK-SVE-NEXT: dup v1.2s, w8
; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_extvec_v2i32_v2i64:
@@ -1464,11 +1464,12 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
;
; CHECK-SVE-LABEL: amull2_i16:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: movi v2.2d, #0x00ffff0000ffff
-; CHECK-SVE-NEXT: smull v3.4s, v0.4h, v1.4h
-; CHECK-SVE-NEXT: smull2 v0.4s, v0.8h, v1.8h
-; CHECK-SVE-NEXT: and v1.16b, v0.16b, v2.16b
-; CHECK-SVE-NEXT: and v0.16b, v3.16b, v2.16b
+; CHECK-SVE-NEXT: smull v2.4s, v0.4h, v1.4h
+; CHECK-SVE-NEXT: smull2 v1.4s, v0.8h, v1.8h
+; CHECK-SVE-NEXT: and z2.s, z2.s, #0xffff
+; CHECK-SVE-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-SVE-NEXT: mov v0.16b, v2.16b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull2_i16:
@@ -1498,11 +1499,12 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
;
; CHECK-SVE-LABEL: amull2_i32:
; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: movi v2.2d, #0x000000ffffffff
-; CHECK-SVE-NEXT: smull v3.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: smull2 v0.2d, v0.4s, v1.4s
-; CHECK-SVE-NEXT: and v1.16b, v0.16b, v2.16b
-; CHECK-SVE-NEXT: and v0.16b, v3.16b, v2.16b
+; CHECK-SVE-NEXT: smull v2.2d, v0.2s, v1.2s
+; CHECK-SVE-NEXT: smull2 v1.2d, v0.4s, v1.4s
+; CHECK-SVE-NEXT: and z2.d, z2.d, #0xffffffff
+; CHECK-SVE-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-SVE-NEXT: mov v0.16b, v2.16b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull2_i32:
@@ -1580,13 +1582,29 @@ entry:
}
define <8 x i16> @umull_and256_v8i16(<8 x i8> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: umull_and256_v8i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.8h, #1, lsl #8
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: umull_and256_v8i16:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v2.8h, #1, lsl #8
+; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: umull_and256_v8i16:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SVE-NEXT: and z1.h, z1.h, #0x100
+; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: umull_and256_v8i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.8h, #1, lsl #8
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
entry:
%in1 = zext <8 x i8> %src1 to <8 x i16>
%in2 = and <8 x i16> %src2, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
@@ -1654,13 +1672,29 @@ entry:
}
define <4 x i32> @umull_and_v4i32(<4 x i16> %src1, <4 x i32> %src2) {
-; CHECK-LABEL: umull_and_v4i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: xtn v1.4h, v1.4s
-; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: umull_and_v4i32:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v2.2d, #0x0000ff000000ff
+; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT: xtn v1.4h, v1.4s
+; CHECK-NEON-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: umull_and_v4i32:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z1.s, z1.s, #0xff
+; CHECK-SVE-NEXT: xtn v1.4h, v1.4s
+; CHECK-SVE-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: umull_and_v4i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0x0000ff000000ff
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: xtn v1.4h, v1.4s
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%in1 = zext <4 x i16> %src1 to <4 x i32>
%in2 = and <4 x i32> %src2, <i32 255, i32 255, i32 255, i32 255>
@@ -1681,9 +1715,10 @@ define <8 x i32> @umull_and_v8i32(<8 x i16> %src1, <8 x i32> %src2) {
;
; CHECK-SVE-LABEL: umull_and_v8i32:
; CHECK-SVE: // %bb.0: // %entry
-; CHECK-SVE-NEXT: movi v3.2d, #0x0000ff000000ff
-; CHECK-SVE-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-SVE-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-SVE-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z2.s, z2.s, #0xff
+; CHECK-SVE-NEXT: and z1.s, z1.s, #0xff
; CHECK-SVE-NEXT: uzp1 v2.8h, v1.8h, v2.8h
; CHECK-SVE-NEXT: umull2 v1.4s, v0.8h, v2.8h
; CHECK-SVE-NEXT: umull v0.4s, v0.4h, v2.4h
@@ -1743,13 +1778,29 @@ entry:
}
define <2 x i64> @umull_and_v2i64(<2 x i32> %src1, <2 x i64> %src2) {
-; CHECK-LABEL: umull_and_v2i64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0x000000000000ff
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: xtn v1.2s, v1.2d
-; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: umull_and_v2i64:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEON-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEON-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: umull_and_v2i64:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z1.d, z1.d, #0xff
+; CHECK-SVE-NEXT: xtn v1.2s, v1.2d
+; CHECK-SVE-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: umull_and_v2i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0x000000000000ff
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: xtn v1.2s, v1.2d
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%in1 = zext <2 x i32> %src1 to <2 x i64>
%in2 = and <2 x i64> %src2, <i64 255, i64 255>
@@ -1770,9 +1821,10 @@ define <4 x i64> @umull_and_v4i64(<4 x i32> %src1, <4 x i64> %src2) {
;
; CHECK-SVE-LABEL: umull_and_v4i64:
; CHECK-SVE: // %bb.0: // %entry
-; CHECK-SVE-NEXT: movi v3.2d, #0x000000000000ff
-; CHECK-SVE-NEXT: and v2.16b, v2.16b, v3.16b
-; CHECK-SVE-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-SVE-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-SVE-NEXT: and z2.d, z2.d, #0xff
+; CHECK-SVE-NEXT: and z1.d, z1.d, #0xff
; CHECK-SVE-NEXT: uzp1 v2.4s, v1.4s, v2.4s
; CHECK-SVE-NEXT: umull2 v1.2d, v0.4s, v2.4s
; CHECK-SVE-NEXT: umull v0.2d, v0.2s, v2.2s
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index f5cf629b2a4a4..099594d5ca8aa 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -75,14 +75,13 @@ define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
; CHECK-LABEL: vector_loop_with_icmp:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: mov z1.d, #2 // =0x2
; CHECK-NEXT: add x8, x0, #4
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: mov w10, #1 // =0x1
; CHECK-NEXT: b .LBB5_2
; CHECK-NEXT: .LBB5_1: // %pred.store.continue6
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: add z0.d, z0.d, #2 // =0x2
; CHECK-NEXT: subs x9, x9, #2
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: b.eq .LBB5_6
diff --git a/llvm/test/CodeGen/AArch64/imm-splat-ops.ll b/llvm/test/CodeGen/AArch64/imm-splat-ops.ll
new file mode 100644
index 0000000000000..6f12a24e6ea36
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/imm-splat-ops.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o - | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefix=CHECK-SVE
+
+define <4 x i32> @and(<4 x i32> noundef %a) {
+; CHECK-NEON-LABEL: and:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: fmov v1.4s, #1.00000000
+; CHECK-NEON-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: and:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: and z0.s, z0.s, #0x3f800000
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %and = and <4 x i32> %a, splat (i32 1065353216)
+ ret <4 x i32> %and
+}
+
+define <4 x i32> @or(<4 x i32> noundef %a) {
+; CHECK-NEON-LABEL: or:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: mvni v1.4s, #127
+; CHECK-NEON-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: or:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: orr z0.s, z0.s, #0xffffff80
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %or = or <4 x i32> %a, splat (i32 -128)
+ ret <4 x i32> %or
+}
+
+
+define <4 x i32> @add(<4 x i32> noundef %a) {
+; CHECK-NEON-LABEL: add:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v1.4s, #1
+; CHECK-NEON-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: add:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: add z0.s, z0.s, #1 // =0x1
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %add = add <4 x i32> %a, splat (i32 1)
+ ret <4 x i32> %add
+}
+
+define <4 x i32> @add_not_multiple_of_256(<4 x i32> noundef %a) {
+; CHECK-NEON-LABEL: add_not_multiple_of_256:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: mov w8, #513 // =0x201
+; CHECK-NEON-NEXT: dup v1.4s, w8
+; CHECK-NEON-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: add_not_multiple_of_256:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: mov w8, #513 // =0x201
+; CHECK-SVE-NEXT: dup v1.4s, w8
+; CHECK-SVE-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SVE-NEXT: ret
+entry:
+ %add = add <4 x i32> %a, splat (i32 513)
+ ret <4 x i32> %add
+}
+
+define <4 x i32> @sub(<4 x i32> noundef %a) {
+; CHECK-NEON-LABEL: sub:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v1.4s, #2, lsl #8
+; CHECK-NEON-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: sub:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: add z0.s, z0.s, #512 // =0x200
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %sub = add <4 x i32> %a, splat (i32 512)
+ ret <4 x i32> %sub
+}
+
+define <4 x i32> @mul(<4 x i32> noundef %a) {
+; CHECK-NEON-LABEL: mul:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: movi v1.4s, #33
+; CHECK-NEON-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: mul:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-SVE-NEXT: add z0.s, z0.s, #33 // =0x21
+; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-SVE-NEXT: ret
+entry:
+ %mul = add <4 x i32> %a, splat (i32 33)
+ ret <4 x i32> %mul
+}
+
+define <4 x i32> @mul_imm_too_big(<4 x i32> noundef %a) {
+; CHECK-NEON-LABEL: mul_imm_too_big:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: mov w8, #320 // =0x140
+; CHECK-NEON-NEXT: dup v1.4s, w8
+; CHECK-NEON-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: mul_imm_too_big:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: mov w8, #320 // =0x140
+; CHECK-SVE-NEXT: dup v1.4s, w8
+; CHECK-SVE-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SVE-NEXT: ret
+entry:
+ %mul = add <4 x i32> %a, splat (i32 320)
+ ret <4 x i32> %mul
+}
diff --git a/llvm/test/CodeGen/AArch64/reassocmls.ll b/llvm/test/CodeGen/AArch64/reassocmls.ll
index 0909fbffc34de..5f4cdd3f5d283 100644
--- a/llvm/test/CodeGen/AArch64/reassocmls.ll
+++ b/llvm/test/CodeGen/AArch64/reassocmls.ll
@@ -331,10 +33...
[truncated]
|
When lowering Neon BUILD_VECTOR nodes attempt to fold constant splats into users by promoting the users to use SVE, when the splat immediate is a legal SVE immediate operand.