From 02d47456d375cd07a270b7c497483e3e585038bb Mon Sep 17 00:00:00 2001 From: Amichaxx Date: Fri, 7 Nov 2025 15:33:14 +0000 Subject: [PATCH 1/2] [AArch64] Fold scalar-to-vector shuffles into DUP/FMOV Previously, LLVM emitted inefficient instructions when the low lanes of a 128-bit vector were set to a scalar and high bits set to 0. This patch utilises instructions fmov/dup to set the low lanes to the necessary scalar and zeroes the high bits of the register. For example: - <2 x i64> from i64 -> fmov d0, x0 - <4 x i32> from i32 -> dup v0.2s, w0 - <8 x i16> from i16 -> dup v0.4h, w0 - <16 x i8> from i8 -> dup v0.8b, w0 --- .../Target/AArch64/AArch64ISelLowering.cpp | 48 ++++++++++++++++ llvm/test/CodeGen/AArch64/aarch64-addv.ll | 9 +-- .../AArch64/aarch64-matrix-umull-smull.ll | 19 +++---- llvm/test/CodeGen/AArch64/bitcast-extend.ll | 3 +- llvm/test/CodeGen/AArch64/combine-sdiv.ll | 56 +++++++++---------- llvm/test/CodeGen/AArch64/ctpop.ll | 3 +- .../AArch64/neon-lowhalf128-optimisation.ll | 45 +++++++++++++++ .../AArch64/srem-seteq-illegal-types.ll | 4 +- 8 files changed, 137 insertions(+), 50 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 40e6400756c7c..18d6d0fc5f2d6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15654,6 +15654,54 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, } } + // 128-bit NEON integer vectors: + // If BUILD_VECTOR has low half == splat(lane 0) and high half == zero, + // build the low half and return SUBREG_TO_REG(0, Lo, dsub). + // This avoids INSERT_VECTOR_ELT chains and lets later passes assume the + // other lanes are zero. + if (VT.isFixedLengthVector() && VT.getSizeInBits() == 128) { + EVT LaneVT = VT.getVectorElementType(); + if (LaneVT.isInteger()) { + const unsigned HalfElts = NumElts >> 1; + SDValue FirstVal = Op.getOperand(0); + + auto IsZero = [&](SDValue V) { return isNullConstant(V); }; + + bool IsLoSplatHiZero = true; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Vi = Op.getOperand(i); + bool violates = (i < HalfElts) ? (Vi != FirstVal) + : !IsZero(Vi); + if (violates) { IsLoSplatHiZero = false; break; } + } + + if (IsLoSplatHiZero) { + EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned LaneBits = LaneVT.getSizeInBits(); + + auto buildSubregToReg = [&](SDValue LoHalf) -> SDValue { + SDValue ZeroImm = DAG.getTargetConstant(0, DL, MVT::i32); + SDValue SubIdx = DAG.getTargetConstant(AArch64::dsub, DL, MVT::i32); + SDNode *N = DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, VT, + {ZeroImm, LoHalf, SubIdx}); + return SDValue(N, 0); + }; + + if (LaneBits == 64) { + // v2i64 + SDValue First64 = DAG.getZExtOrTrunc(FirstVal, DL, MVT::i64); + SDValue Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, HalfVT, First64); + return buildSubregToReg(Lo); + } else { + // v4i32/v8i16/v16i8 + SDValue FirstW = DAG.getZExtOrTrunc(FirstVal, DL, MVT::i32); + SDValue DupLo = DAG.getNode(AArch64ISD::DUP, DL, HalfVT, FirstW); + return buildSubregToReg(DupLo); + } + } + } + } + // Use DUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. if (usesOnlyOneValue) { diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll index d9180a28bd40b..94219ee1537e6 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -553,9 +553,8 @@ define i8 @addv_zero_lanes_negative_v8i8(ptr %arr) { define i8 @addv_zero_lanes_v16i8(ptr %arr) { ; CHECK-SD-LABEL: addv_zero_lanes_v16i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ldrb w8, [x0] -; CHECK-SD-NEXT: mov v0.d[0], x8 +; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: addv b0, v0.16b ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret @@ -578,9 +577,8 @@ define i8 @addv_zero_lanes_v16i8(ptr %arr) { define i16 @addv_zero_lanes_v8i16(ptr %arr) { ; CHECK-SD-LABEL: addv_zero_lanes_v8i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ldrh w8, [x0] -; CHECK-SD-NEXT: mov v0.d[0], x8 +; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: addv h0, v0.8h ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret @@ -603,9 +601,8 @@ define i16 @addv_zero_lanes_v8i16(ptr %arr) { define i32 @addv_zero_lanes_v4i32(ptr %arr) { ; CHECK-SD-LABEL: addv_zero_lanes_v4i32: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ldr w8, [x0] -; CHECK-SD-NEXT: mov v0.d[0], x8 +; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: addv s0, v0.4s ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index 4894932d3c9b1..748489bff830e 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -823,15 +823,14 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none ; CHECK-SD-NEXT: // %bb.9: // %vec.epilog.iter.check ; CHECK-SD-NEXT: cbz x11, .LBB6_13 ; CHECK-SD-NEXT: .LBB6_10: // %vec.epilog.ph -; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: mov w11, w1 -; CHECK-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-SD-NEXT: movi v2.2d, #0x000000000000ff ; CHECK-SD-NEXT: sxtb x11, w11 -; CHECK-SD-NEXT: movi v3.2d, #0x000000000000ff -; CHECK-SD-NEXT: dup v2.2s, w11 +; CHECK-SD-NEXT: fmov d3, x8 +; CHECK-SD-NEXT: dup v1.2s, w11 ; CHECK-SD-NEXT: mov x11, x10 ; CHECK-SD-NEXT: and x10, x9, #0xfffffffc -; CHECK-SD-NEXT: mov v0.d[0], x8 ; CHECK-SD-NEXT: sub x8, x11, x10 ; CHECK-SD-NEXT: add x11, x0, x11 ; CHECK-SD-NEXT: .LBB6_11: // %vec.epilog.vector.body @@ -842,15 +841,15 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none ; CHECK-SD-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-SD-NEXT: ushll v5.2d, v4.2s, #0 ; CHECK-SD-NEXT: ushll2 v4.2d, v4.4s, #0 -; CHECK-SD-NEXT: and v5.16b, v5.16b, v3.16b -; CHECK-SD-NEXT: and v4.16b, v4.16b, v3.16b +; CHECK-SD-NEXT: and v5.16b, v5.16b, v2.16b +; CHECK-SD-NEXT: and v4.16b, v4.16b, v2.16b ; CHECK-SD-NEXT: xtn v5.2s, v5.2d ; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: smlal v1.2d, v2.2s, v4.2s -; CHECK-SD-NEXT: smlal v0.2d, v2.2s, v5.2s +; CHECK-SD-NEXT: smlal v0.2d, v1.2s, v4.2s +; CHECK-SD-NEXT: smlal v3.2d, v1.2s, v5.2s ; CHECK-SD-NEXT: b.ne .LBB6_11 ; CHECK-SD-NEXT: // %bb.12: // %vec.epilog.middle.block -; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: add v0.2d, v3.2d, v0.2d ; CHECK-SD-NEXT: cmp x10, x9 ; CHECK-SD-NEXT: addp d0, v0.2d ; CHECK-SD-NEXT: fmov x8, d0 diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll index 741dcf3ad4c2f..b981c1701725a 100644 --- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll @@ -339,9 +339,8 @@ define <8 x i8> @load_sext_i32_v8i8(ptr %p) { define <16 x i8> @load_zext_v16i8(ptr %p) { ; CHECK-SD-LABEL: load_zext_v16i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ldr w8, [x0] -; CHECK-SD-NEXT: mov v0.d[0], x8 +; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: load_zext_v16i8: diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index cca190f08df2b..c9f6295d4f6cb 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -578,10 +578,10 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; CHECK-SD-NEXT: adrp x8, .LCPI21_1 ; CHECK-SD-NEXT: ushl v1.2d, v1.2d, v2.2d ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI21_1] -; CHECK-SD-NEXT: adrp x8, .LCPI21_2 +; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-SD-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: sshl v1.2d, v1.2d, v2.2d -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI21_2] +; CHECK-SD-NEXT: fmov d2, x8 ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; @@ -612,23 +612,23 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI22_0 ; CHECK-SD-NEXT: cmlt v2.2d, v0.2d, #0 +; CHECK-SD-NEXT: adrp x9, .LCPI22_3 ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI22_0] -; CHECK-SD-NEXT: adrp x8, .LCPI22_3 -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI22_3] +; CHECK-SD-NEXT: adrp x8, .LCPI22_2 +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI22_2] ; CHECK-SD-NEXT: adrp x8, .LCPI22_1 ; CHECK-SD-NEXT: ushl v2.2d, v2.2d, v3.2d ; CHECK-SD-NEXT: cmlt v3.2d, v1.2d, #0 ; CHECK-SD-NEXT: add v2.2d, v0.2d, v2.2d ; CHECK-SD-NEXT: ushl v3.2d, v3.2d, v4.2d ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI22_1] -; CHECK-SD-NEXT: adrp x8, .LCPI22_2 +; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-SD-NEXT: sshl v2.2d, v2.2d, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI22_2] ; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d -; CHECK-SD-NEXT: adrp x8, .LCPI22_4 -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI22_4] -; CHECK-SD-NEXT: bif v0.16b, v2.16b, v4.16b -; CHECK-SD-NEXT: sshl v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: fmov d3, x8 +; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI22_3] +; CHECK-SD-NEXT: bif v0.16b, v2.16b, v3.16b +; CHECK-SD-NEXT: sshl v1.2d, v1.2d, v4.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v4i64: @@ -670,28 +670,28 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-SD-NEXT: cmlt v4.2d, v0.2d, #0 ; CHECK-SD-NEXT: cmlt v6.2d, v2.2d, #0 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI23_0] -; CHECK-SD-NEXT: adrp x8, .LCPI23_3 -; CHECK-SD-NEXT: cmlt v7.2d, v3.2d, #0 -; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI23_3] -; CHECK-SD-NEXT: adrp x8, .LCPI23_1 +; CHECK-SD-NEXT: adrp x8, .LCPI23_2 +; CHECK-SD-NEXT: cmlt v7.2d, v1.2d, #0 +; CHECK-SD-NEXT: cmlt v16.2d, v3.2d, #0 ; CHECK-SD-NEXT: ushl v4.2d, v4.2d, v5.2d ; CHECK-SD-NEXT: ushl v5.2d, v6.2d, v5.2d -; CHECK-SD-NEXT: cmlt v6.2d, v1.2d, #0 +; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI23_2] +; CHECK-SD-NEXT: adrp x8, .LCPI23_1 +; CHECK-SD-NEXT: ushl v7.2d, v7.2d, v6.2d ; CHECK-SD-NEXT: ldr q17, [x8, :lo12:.LCPI23_1] -; CHECK-SD-NEXT: ushl v7.2d, v7.2d, v16.2d -; CHECK-SD-NEXT: adrp x8, .LCPI23_2 +; CHECK-SD-NEXT: ushl v6.2d, v16.2d, v6.2d ; CHECK-SD-NEXT: add v4.2d, v0.2d, v4.2d ; CHECK-SD-NEXT: add v5.2d, v2.2d, v5.2d -; CHECK-SD-NEXT: ushl v6.2d, v6.2d, v16.2d -; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI23_2] -; CHECK-SD-NEXT: adrp x8, .LCPI23_4 -; CHECK-SD-NEXT: add v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-SD-NEXT: add v1.2d, v1.2d, v7.2d +; CHECK-SD-NEXT: fmov d7, x8 +; CHECK-SD-NEXT: adrp x8, .LCPI23_3 ; CHECK-SD-NEXT: sshl v4.2d, v4.2d, v17.2d ; CHECK-SD-NEXT: sshl v5.2d, v5.2d, v17.2d -; CHECK-SD-NEXT: add v1.2d, v1.2d, v6.2d -; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI23_4] -; CHECK-SD-NEXT: bif v0.16b, v4.16b, v16.16b -; CHECK-SD-NEXT: bif v2.16b, v5.16b, v16.16b +; CHECK-SD-NEXT: add v3.2d, v3.2d, v6.2d +; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI23_3] +; CHECK-SD-NEXT: bif v0.16b, v4.16b, v7.16b +; CHECK-SD-NEXT: bif v2.16b, v5.16b, v7.16b ; CHECK-SD-NEXT: sshl v1.2d, v1.2d, v6.2d ; CHECK-SD-NEXT: sshl v3.2d, v3.2d, v6.2d ; CHECK-SD-NEXT: ret @@ -920,13 +920,13 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) { ; CHECK-SD-NEXT: adrp x8, .LCPI27_1 ; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI27_1] +; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-SD-NEXT: dup v3.2s, w8 ; CHECK-SD-NEXT: adrp x8, .LCPI27_2 ; CHECK-SD-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: sshl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI27_2] -; CHECK-SD-NEXT: adrp x8, .LCPI27_3 -; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI27_3] +; CHECK-SD-NEXT: bif v0.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: neg v1.4s, v0.4s ; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll index d547b6bec5b83..bf3a28d16f683 100644 --- a/llvm/test/CodeGen/AArch64/ctpop.ll +++ b/llvm/test/CodeGen/AArch64/ctpop.ll @@ -603,10 +603,9 @@ entry: define i128 @i128_mask(i128 %x) { ; CHECK-SD-LABEL: i128_mask: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: and x8, x0, #0xff ; CHECK-SD-NEXT: mov x1, xzr -; CHECK-SD-NEXT: mov v0.d[0], x8 +; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: cnt v0.16b, v0.16b ; CHECK-SD-NEXT: addv b0, v0.16b ; CHECK-SD-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll b/llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll new file mode 100644 index 0000000000000..d2e193fb46f90 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +define <2 x i64> @low_vector_splat_v2i64_from_i64(i64 %0){ +; CHECK-LABEL: low_vector_splat_v2i64_from_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ret + %2 = insertelement <1 x i64> poison, i64 %0, i64 0 + %3 = shufflevector <1 x i64> %2, <1 x i64> zeroinitializer, <2 x i32> + ret <2 x i64> %3 +} + +define <4 x i32> @low_vector_splat_v4i32_from_i32(i32 %0) { +; CHECK-LABEL: low_vector_splat_v4i32_from_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.2s, w0 +; CHECK-NEXT: ret + %2 = insertelement <2 x i32> poison, i32 %0, i64 0 + %3 = shufflevector <2 x i32> %2, <2 x i32> poison, <2 x i32> zeroinitializer + %4 = shufflevector <2 x i32> %3, <2 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %4 +} + +define <8 x i16> @low_vector_splat_v8i16_from_i16(i16 %0) { +; CHECK-LABEL: low_vector_splat_v8i16_from_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.4h, w0 +; CHECK-NEXT: ret + %2 = insertelement <4 x i16> poison, i16 %0, i64 0 + %3 = shufflevector <4 x i16> %2, <4 x i16> poison, <4 x i32> zeroinitializer + %4 = shufflevector <4 x i16> %3, <4 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %4 +} + +define <16 x i8> @low_vector_splat_v16i8_from_i8(i8 %0) { +; CHECK-LABEL: low_vector_splat_v16i8_from_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8b, w0 +; CHECK-NEXT: ret + %2 = insertelement <8 x i8> poison, i8 %0, i64 0 + %3 = shufflevector <8 x i8> %2, <8 x i8> poison, <8 x i32> zeroinitializer + %4 = shufflevector <8 x i8> %3, <8 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %4 +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll index 884d668157e5f..3da50ee45a9a1 100644 --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -90,9 +90,9 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; CHECK-NEXT: add x8, x12, x8 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: fmov d3, x8 -; CHECK-NEXT: adrp x8, .LCPI3_1 +; CHECK-NEXT: mov w8, #3 // =0x3 ; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: fmov d2, x8 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: cmeq v1.2d, v1.2d, v2.2d From c43edd48bc8d254e57e896945ed1046acd62542a Mon Sep 17 00:00:00 2001 From: Amichaxx Date: Fri, 7 Nov 2025 16:01:24 +0000 Subject: [PATCH 2/2] Clang format --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 18d6d0fc5f2d6..fe4126c3845f5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15670,9 +15670,11 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, bool IsLoSplatHiZero = true; for (unsigned i = 0; i < NumElts; ++i) { SDValue Vi = Op.getOperand(i); - bool violates = (i < HalfElts) ? (Vi != FirstVal) - : !IsZero(Vi); - if (violates) { IsLoSplatHiZero = false; break; } + bool violates = (i < HalfElts) ? (Vi != FirstVal) : !IsZero(Vi); + if (violates) { + IsLoSplatHiZero = false; + break; + } } if (IsLoSplatHiZero) { @@ -15681,21 +15683,21 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, auto buildSubregToReg = [&](SDValue LoHalf) -> SDValue { SDValue ZeroImm = DAG.getTargetConstant(0, DL, MVT::i32); - SDValue SubIdx = DAG.getTargetConstant(AArch64::dsub, DL, MVT::i32); + SDValue SubIdx = DAG.getTargetConstant(AArch64::dsub, DL, MVT::i32); SDNode *N = DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, VT, - {ZeroImm, LoHalf, SubIdx}); + {ZeroImm, LoHalf, SubIdx}); return SDValue(N, 0); }; if (LaneBits == 64) { // v2i64 SDValue First64 = DAG.getZExtOrTrunc(FirstVal, DL, MVT::i64); - SDValue Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, HalfVT, First64); + SDValue Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, HalfVT, First64); return buildSubregToReg(Lo); } else { // v4i32/v8i16/v16i8 SDValue FirstW = DAG.getZExtOrTrunc(FirstVal, DL, MVT::i32); - SDValue DupLo = DAG.getNode(AArch64ISD::DUP, DL, HalfVT, FirstW); + SDValue DupLo = DAG.getNode(AArch64ISD::DUP, DL, HalfVT, FirstW); return buildSubregToReg(DupLo); } }