From 190776a5782871e9f15d89de64bbb888076c47f1 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 16 Sep 2025 13:49:29 +0000 Subject: [PATCH 1/2] [AArch64] Use SVE to materialise some 128-bit vector constants There is no easy way to materialise some fixed-width vector constants with 64-bit elements. This is because NEON's movi instruction is restricted to setting all bits in a byte to the same value, i.e. 0xFF can be encoded as an immediate but not 0x1F. However, if SVE is available we can use the dup instruction to cover more cases. Rather than lower the immediate directly using the dup instruction, I've instead used the generic SPLAT_VECTOR node in combination with an EXTRACT_SUBVECTOR. This is because we already have SVE splat_vector patterns that can match directly to dup. --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 35 +--- .../Target/AArch64/AArch64ISelLowering.cpp | 26 +++ .../MCTargetDesc/AArch64AddressingModes.h | 30 ++++ .../CodeGen/AArch64/extract-vector-cmp.ll | 5 +- llvm/test/CodeGen/AArch64/movi64_sve.ll | 165 ++++++++++++++++++ 5 files changed, 231 insertions(+), 30 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/movi64_sve.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 54bdb8750f709..563a42fcdeac2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4346,34 +4346,15 @@ bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, ->getAPIntValue() .trunc(VT.getFixedSizeInBits()) .getSExtValue(); + int32_t ImmVal, ShiftVal; + if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal, + ShiftVal)) + return false; - switch (VT.SimpleTy) { - case MVT::i8: - // All immediates are supported. - Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); - Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); - return true; - case MVT::i16: - case MVT::i32: - case MVT::i64: - // Support 8bit signed immediates. - if (Val >= -128 && Val <= 127) { - Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); - Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); - return true; - } - // Support 16bit signed immediates that are a multiple of 256. - if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { - Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); - Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); - return true; - } - break; - default: - break; - } - - return false; + // All immediates are supported. + Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); + return true; } bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index cd7f0e719ad0c..a04023561fbc8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15277,6 +15277,27 @@ static SDValue NormalizeBuildVector(SDValue Op, return DAG.getBuildVector(VT, DL, Ops); } +static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG, + const AArch64Subtarget *ST, APInt &DefBits) { + EVT VT = Op.getValueType(); + // TODO: We should be able to support 64-bit destinations too + if (!ST->hasSVE() || DefBits.getHiBits(64) != DefBits.getLoBits(64) || + VT.getFixedSizeInBits() != 128) + return SDValue(); + + // See if we can make use of the SVE dup instruction. + APInt Val64 = DefBits.sextOrTrunc(64); + int32_t ImmVal, ShiftVal; + if (!AArch64_AM::isSVECpyDupImm(64, Val64.getSExtValue(), ImmVal, ShiftVal)) + return SDValue(); + + SDLoc DL(Op); + SDValue SplatVal = DAG.getSplatVector(MVT::nxv2i64, DL, + DAG.getConstant(Val64, DL, MVT::i64)); + SDValue Res = convertFromScalableVector(DAG, MVT::v2i64, SplatVal); + return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Res); +} + static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST) { EVT VT = Op.getValueType(); @@ -15316,6 +15337,11 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, if (SDValue R = TryMOVIWithBits(UndefBits)) return R; + // NEON doesn't have a nice way of materialising 64-bit values, but if SVE + // is available we have more options. + if (SDValue R = trySVESplat64(Op, DAG, ST, DefBits)) + return R; + // See if a fneg of the constant can be materialized with a MOVI, etc auto TryWithFNeg = [&](APInt DefBits, MVT FVT) { // FNegate each sub-element of the constant diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h index f542592d22c5f..4ae5d040d5e8a 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -871,6 +871,36 @@ inline static bool isAnyMOVWMovAlias(uint64_t Value, int RegWidth) { return isAnyMOVZMovAlias(Value, RegWidth); } +static inline bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, + int32_t &Shift) { + switch (SizeInBits) { + case 8: + // All immediates are supported. + Shift = 0; + Imm = Val & 0xFF; + return true; + case 16: + case 32: + case 64: + // Support 8bit signed immediates. + if (Val >= -128 && Val <= 127) { + Shift = 0; + Imm = Val & 0xFF; + return true; + } + // Support 16bit signed immediates that are a multiple of 256. + if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { + Shift = 8; + Imm = (Val >> 8) & 0xFF; + return true; + } + break; + default: + break; + } + return false; +} + } // end namespace AArch64_AM } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll index 832e34b664fbe..f5cf629b2a4a4 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll @@ -75,10 +75,9 @@ define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) { ; CHECK-LABEL: vector_loop_with_icmp: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: mov w8, #2 // =0x2 -; CHECK-NEXT: mov w9, #16 // =0x10 -; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov z1.d, #2 // =0x2 ; CHECK-NEXT: add x8, x0, #4 +; CHECK-NEXT: mov w9, #16 // =0x10 ; CHECK-NEXT: mov w10, #1 // =0x1 ; CHECK-NEXT: b .LBB5_2 ; CHECK-NEXT: .LBB5_1: // %pred.store.continue6 diff --git a/llvm/test/CodeGen/AArch64/movi64_sve.ll b/llvm/test/CodeGen/AArch64/movi64_sve.ll new file mode 100644 index 0000000000000..da1a21532ac79 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/movi64_sve.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s + +define <2 x i64> @movi_1_v2i64() { +; CHECK-LABEL: movi_1_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #1 // =0x1 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 1) +} + +define <2 x i64> @movi_127_v2i64() { +; CHECK-LABEL: movi_127_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #127 // =0x7f +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 127) +} + +define <2 x i64> @movi_m128_v2i64() { +; CHECK-LABEL: movi_m128_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #-128 // =0xffffffffffffff80 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 -128) +} + +define <2 x i64> @movi_256_v2i64() { +; CHECK-LABEL: movi_256_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #256 // =0x100 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 256) +} + +define <2 x i64> @movi_32512_v2i64() { +; CHECK-LABEL: movi_32512_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 32512) +} + +define <2 x i64> @movi_m32768_v2i64() { +; CHECK-LABEL: movi_m32768_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 -32768) +} + +; Special cases where the destination vector does not have 64-bit elements + +define <4 x i32> @movi_v4i32_1() { +; CHECK-LABEL: movi_v4i32_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #127 // =0x7f +; CHECK-NEXT: ret + ret <4 x i32> +} + +define <4 x i32> @movi_v4i32_2() { +; CHECK-LABEL: movi_v4i32_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 +; CHECK-NEXT: ret + ret <4 x i32> +} + +define <8 x i16> @movi_v8i16_1() { +; CHECK-LABEL: movi_v8i16_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #127 // =0x7f +; CHECK-NEXT: ret + ret <8 x i16> +} + +define <8 x i16> @movi_v8i16_2() { +; CHECK-LABEL: movi_v8i16_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 +; CHECK-NEXT: ret + ret <8 x i16> +} + +define <16 x i8> @movi_v16i8_1() { +; CHECK-LABEL: movi_v16i8_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #127 // =0x7f +; CHECK-NEXT: ret + ret <16 x i8> +} + +define <16 x i8> @movi_v16i8_2() { +; CHECK-LABEL: movi_v16i8_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 +; CHECK-NEXT: ret + ret <16 x i8> +} + +; Negative cases + +define <2 x i64> @movi_128_v2i64() { +; CHECK-LABEL: movi_128_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #128 // =0x80 +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 128) +} + +define <2 x i64> @movi_m127_v2i64() { +; CHECK-LABEL: movi_m127_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-129 // =0xffffffffffffff7f +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 -129) +} + +define <2 x i64> @movi_32513_v2i64() { +; CHECK-LABEL: movi_32513_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #32513 // =0x7f01 +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 32513) +} + +define <2 x i64> @movi_m32769_v2i64() { +; CHECK-LABEL: movi_m32769_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-32769 // =0xffffffffffff7fff +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 -32769) +} + +define <2 x i64> @movi_257_v2i64() { +; CHECK-LABEL: movi_257_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 // =0x101 +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: ret + ret <2 x i64> splat (i64 257) +} + +define <4 x i32> @movi_v4i32_3() { +; CHECK-LABEL: movi_v4i32_3: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: ret + ret <4 x i32> +} + +define <16 x i8> @movi_v16i8_3() { +; CHECK-LABEL: movi_v16i8_3: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: ret + ret <16 x i8> +} From 8c3446f2216dbf9ec3590c21f70a1f1972d889b1 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 22 Sep 2025 08:28:06 +0000 Subject: [PATCH 2/2] Address review comments --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 1 - .../Target/AArch64/AArch64ISelLowering.cpp | 9 +- llvm/test/CodeGen/AArch64/movi64_sve.ll | 241 ++++++++++++------ 3 files changed, 161 insertions(+), 90 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 563a42fcdeac2..6a1b06eea4309 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4351,7 +4351,6 @@ bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, ShiftVal)) return false; - // All immediates are supported. Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32); Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a04023561fbc8..9b9b457d54162 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15281,12 +15281,12 @@ static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST, APInt &DefBits) { EVT VT = Op.getValueType(); // TODO: We should be able to support 64-bit destinations too - if (!ST->hasSVE() || DefBits.getHiBits(64) != DefBits.getLoBits(64) || - VT.getFixedSizeInBits() != 128) + if (!ST->hasSVE() || !VT.is128BitVector() || + DefBits.getHiBits(64) != DefBits.getLoBits(64)) return SDValue(); // See if we can make use of the SVE dup instruction. - APInt Val64 = DefBits.sextOrTrunc(64); + APInt Val64 = DefBits.trunc(64); int32_t ImmVal, ShiftVal; if (!AArch64_AM::isSVECpyDupImm(64, Val64.getSExtValue(), ImmVal, ShiftVal)) return SDValue(); @@ -15337,8 +15337,7 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, if (SDValue R = TryMOVIWithBits(UndefBits)) return R; - // NEON doesn't have a nice way of materialising 64-bit values, but if SVE - // is available we have more options. + // Try to materialise the constant using SVE when available. if (SDValue R = trySVESplat64(Op, DAG, ST, DefBits)) return R; diff --git a/llvm/test/CodeGen/AArch64/movi64_sve.ll b/llvm/test/CodeGen/AArch64/movi64_sve.ll index da1a21532ac79..1d4e00d0c3d10 100644 --- a/llvm/test/CodeGen/AArch64/movi64_sve.ll +++ b/llvm/test/CodeGen/AArch64/movi64_sve.ll @@ -1,165 +1,238 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+neon < %s | FileCheck %s --check-prefixes=COMMON,NEON +; RUN: llc -mtriple=aarch64 -mattr=+neon,+sve < %s | FileCheck %s --check-prefixes=COMMON,SVE define <2 x i64> @movi_1_v2i64() { -; CHECK-LABEL: movi_1_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #1 // =0x1 -; CHECK-NEXT: ret +; NEON-LABEL: movi_1_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: mov w8, #1 // =0x1 +; NEON-NEXT: dup v0.2d, x8 +; NEON-NEXT: ret +; +; SVE-LABEL: movi_1_v2i64: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #1 // =0x1 +; SVE-NEXT: ret ret <2 x i64> splat (i64 1) } define <2 x i64> @movi_127_v2i64() { -; CHECK-LABEL: movi_127_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #127 // =0x7f -; CHECK-NEXT: ret +; NEON-LABEL: movi_127_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: mov w8, #127 // =0x7f +; NEON-NEXT: dup v0.2d, x8 +; NEON-NEXT: ret +; +; SVE-LABEL: movi_127_v2i64: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #127 // =0x7f +; SVE-NEXT: ret ret <2 x i64> splat (i64 127) } define <2 x i64> @movi_m128_v2i64() { -; CHECK-LABEL: movi_m128_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #-128 // =0xffffffffffffff80 -; CHECK-NEXT: ret +; NEON-LABEL: movi_m128_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: mov x8, #-128 // =0xffffffffffffff80 +; NEON-NEXT: dup v0.2d, x8 +; NEON-NEXT: ret +; +; SVE-LABEL: movi_m128_v2i64: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #-128 // =0xffffffffffffff80 +; SVE-NEXT: ret ret <2 x i64> splat (i64 -128) } define <2 x i64> @movi_256_v2i64() { -; CHECK-LABEL: movi_256_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #256 // =0x100 -; CHECK-NEXT: ret +; NEON-LABEL: movi_256_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: mov w8, #256 // =0x100 +; NEON-NEXT: dup v0.2d, x8 +; NEON-NEXT: ret +; +; SVE-LABEL: movi_256_v2i64: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #256 // =0x100 +; SVE-NEXT: ret ret <2 x i64> splat (i64 256) } define <2 x i64> @movi_32512_v2i64() { -; CHECK-LABEL: movi_32512_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 -; CHECK-NEXT: ret +; NEON-LABEL: movi_32512_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: mov w8, #32512 // =0x7f00 +; NEON-NEXT: dup v0.2d, x8 +; NEON-NEXT: ret +; +; SVE-LABEL: movi_32512_v2i64: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #32512 // =0x7f00 +; SVE-NEXT: ret ret <2 x i64> splat (i64 32512) } define <2 x i64> @movi_m32768_v2i64() { -; CHECK-LABEL: movi_m32768_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: ret +; NEON-LABEL: movi_m32768_v2i64: +; NEON: // %bb.0: +; NEON-NEXT: mov x8, #-32768 // =0xffffffffffff8000 +; NEON-NEXT: dup v0.2d, x8 +; NEON-NEXT: ret +; +; SVE-LABEL: movi_m32768_v2i64: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #-32768 // =0xffffffffffff8000 +; SVE-NEXT: ret ret <2 x i64> splat (i64 -32768) } ; Special cases where the destination vector does not have 64-bit elements define <4 x i32> @movi_v4i32_1() { -; CHECK-LABEL: movi_v4i32_1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #127 // =0x7f -; CHECK-NEXT: ret +; NEON-LABEL: movi_v4i32_1: +; NEON: // %bb.0: +; NEON-NEXT: adrp x8, .LCPI6_0 +; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI6_0] +; NEON-NEXT: ret +; +; SVE-LABEL: movi_v4i32_1: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #127 // =0x7f +; SVE-NEXT: ret ret <4 x i32> } define <4 x i32> @movi_v4i32_2() { -; CHECK-LABEL: movi_v4i32_2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 -; CHECK-NEXT: ret +; NEON-LABEL: movi_v4i32_2: +; NEON: // %bb.0: +; NEON-NEXT: adrp x8, .LCPI7_0 +; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI7_0] +; NEON-NEXT: ret +; +; SVE-LABEL: movi_v4i32_2: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #32512 // =0x7f00 +; SVE-NEXT: ret ret <4 x i32> } define <8 x i16> @movi_v8i16_1() { -; CHECK-LABEL: movi_v8i16_1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #127 // =0x7f -; CHECK-NEXT: ret +; NEON-LABEL: movi_v8i16_1: +; NEON: // %bb.0: +; NEON-NEXT: adrp x8, .LCPI8_0 +; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; NEON-NEXT: ret +; +; SVE-LABEL: movi_v8i16_1: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #127 // =0x7f +; SVE-NEXT: ret ret <8 x i16> } define <8 x i16> @movi_v8i16_2() { -; CHECK-LABEL: movi_v8i16_2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 -; CHECK-NEXT: ret +; NEON-LABEL: movi_v8i16_2: +; NEON: // %bb.0: +; NEON-NEXT: adrp x8, .LCPI9_0 +; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] +; NEON-NEXT: ret +; +; SVE-LABEL: movi_v8i16_2: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #32512 // =0x7f00 +; SVE-NEXT: ret ret <8 x i16> } define <16 x i8> @movi_v16i8_1() { -; CHECK-LABEL: movi_v16i8_1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #127 // =0x7f -; CHECK-NEXT: ret +; NEON-LABEL: movi_v16i8_1: +; NEON: // %bb.0: +; NEON-NEXT: adrp x8, .LCPI10_0 +; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; NEON-NEXT: ret +; +; SVE-LABEL: movi_v16i8_1: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #127 // =0x7f +; SVE-NEXT: ret ret <16 x i8> } define <16 x i8> @movi_v16i8_2() { -; CHECK-LABEL: movi_v16i8_2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #32512 // =0x7f00 -; CHECK-NEXT: ret +; NEON-LABEL: movi_v16i8_2: +; NEON: // %bb.0: +; NEON-NEXT: adrp x8, .LCPI11_0 +; NEON-NEXT: ldr q0, [x8, :lo12:.LCPI11_0] +; NEON-NEXT: ret +; +; SVE-LABEL: movi_v16i8_2: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #32512 // =0x7f00 +; SVE-NEXT: ret ret <16 x i8> } ; Negative cases define <2 x i64> @movi_128_v2i64() { -; CHECK-LABEL: movi_128_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 // =0x80 -; CHECK-NEXT: dup v0.2d, x8 -; CHECK-NEXT: ret +; COMMON-LABEL: movi_128_v2i64: +; COMMON: // %bb.0: +; COMMON-NEXT: mov w8, #128 // =0x80 +; COMMON-NEXT: dup v0.2d, x8 +; COMMON-NEXT: ret ret <2 x i64> splat (i64 128) } define <2 x i64> @movi_m127_v2i64() { -; CHECK-LABEL: movi_m127_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-129 // =0xffffffffffffff7f -; CHECK-NEXT: dup v0.2d, x8 -; CHECK-NEXT: ret +; COMMON-LABEL: movi_m127_v2i64: +; COMMON: // %bb.0: +; COMMON-NEXT: mov x8, #-129 // =0xffffffffffffff7f +; COMMON-NEXT: dup v0.2d, x8 +; COMMON-NEXT: ret ret <2 x i64> splat (i64 -129) } define <2 x i64> @movi_32513_v2i64() { -; CHECK-LABEL: movi_32513_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32513 // =0x7f01 -; CHECK-NEXT: dup v0.2d, x8 -; CHECK-NEXT: ret +; COMMON-LABEL: movi_32513_v2i64: +; COMMON: // %bb.0: +; COMMON-NEXT: mov w8, #32513 // =0x7f01 +; COMMON-NEXT: dup v0.2d, x8 +; COMMON-NEXT: ret ret <2 x i64> splat (i64 32513) } define <2 x i64> @movi_m32769_v2i64() { -; CHECK-LABEL: movi_m32769_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-32769 // =0xffffffffffff7fff -; CHECK-NEXT: dup v0.2d, x8 -; CHECK-NEXT: ret +; COMMON-LABEL: movi_m32769_v2i64: +; COMMON: // %bb.0: +; COMMON-NEXT: mov x8, #-32769 // =0xffffffffffff7fff +; COMMON-NEXT: dup v0.2d, x8 +; COMMON-NEXT: ret ret <2 x i64> splat (i64 -32769) } define <2 x i64> @movi_257_v2i64() { -; CHECK-LABEL: movi_257_v2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 // =0x101 -; CHECK-NEXT: dup v0.2d, x8 -; CHECK-NEXT: ret +; COMMON-LABEL: movi_257_v2i64: +; COMMON: // %bb.0: +; COMMON-NEXT: mov w8, #257 // =0x101 +; COMMON-NEXT: dup v0.2d, x8 +; COMMON-NEXT: ret ret <2 x i64> splat (i64 257) } define <4 x i32> @movi_v4i32_3() { -; CHECK-LABEL: movi_v4i32_3: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: ret +; COMMON-LABEL: movi_v4i32_3: +; COMMON: // %bb.0: +; COMMON-NEXT: adrp x8, .LCPI17_0 +; COMMON-NEXT: ldr q0, [x8, :lo12:.LCPI17_0] +; COMMON-NEXT: ret ret <4 x i32> } define <16 x i8> @movi_v16i8_3() { -; CHECK-LABEL: movi_v16i8_3: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: ret +; COMMON-LABEL: movi_v16i8_3: +; COMMON: // %bb.0: +; COMMON-NEXT: adrp x8, .LCPI18_0 +; COMMON-NEXT: ldr q0, [x8, :lo12:.LCPI18_0] +; COMMON-NEXT: ret ret <16 x i8> }