From 35db481bd8592daef3d0a687ce4aa6e955b27694 Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Thu, 30 Oct 2025 11:41:52 +0000 Subject: [PATCH 1/8] [AArch64][GlobalISel] Added G_PMULL node GlobalISel now selects the pmull intrinsic given vector operands, without falling back to SDAG. --- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ++ .../AArch64/GISel/AArch64LegalizerInfo.cpp | 4 + .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 2 + .../test/CodeGen/AArch64/128bit_load_store.ll | 34 +++++--- .../test/CodeGen/AArch64/2s-complement-asm.ll | 1 + llvm/test/CodeGen/AArch64/aarch64-smull.ll | 82 ++++++++++++++----- .../test/CodeGen/AArch64/arm64-neon-3vdiff.ll | 5 -- llvm/test/CodeGen/AArch64/arm64-vmul.ll | 76 ++++++----------- .../CodeGen/AArch64/highextractbitcast.ll | 24 ++++-- 9 files changed, 139 insertions(+), 96 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 30b7b03f7a69a..52b216c7fe0f0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_PMULL : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src1, type1:$src2); + let hasSideEffects = 0; +} + def G_UADDLP : AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); @@ -273,6 +279,7 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5f93847bc680e..bb027e52ccc29 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1809,6 +1809,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerBinOp(TargetOpcode::G_FMAXNUM); case Intrinsic::aarch64_neon_fminnm: return LowerBinOp(TargetOpcode::G_FMINNUM); + case Intrinsic::aarch64_neon_pmull: + return LowerBinOp(AArch64::G_PMULL); // Will prob need to be 64bit specific, /shrug + case Intrinsic::aarch64_neon_pmull64: + return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_smull: return LowerBinOp(AArch64::G_SMULL); case Intrinsic::aarch64_neon_umull: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 6d2d70511e894..aad00fc62e2fe 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -482,6 +482,8 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI, case Intrinsic::aarch64_neon_sqrdmulh: case Intrinsic::aarch64_neon_sqadd: case Intrinsic::aarch64_neon_sqsub: + case Intrinsic::aarch64_neon_pmull: + case Intrinsic::aarch64_neon_pmull64: case Intrinsic::aarch64_crypto_sha1h: case Intrinsic::aarch64_crypto_sha1c: case Intrinsic::aarch64_crypto_sha1p: diff --git a/llvm/test/CodeGen/AArch64/128bit_load_store.ll b/llvm/test/CodeGen/AArch64/128bit_load_store.ll index ee092bc4cb7d2..329c49ee1a68e 100644 --- a/llvm/test/CodeGen/AArch64/128bit_load_store.ll +++ b/llvm/test/CodeGen/AArch64/128bit_load_store.ll @@ -1,24 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s define void @test_store_f128(ptr %ptr, fp128 %val) #0 { -; CHECK-LABEL: test_store_f128 -; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_store_f128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret entry: store fp128 %val, ptr %ptr, align 16 ret void } define fp128 @test_load_f128(ptr readonly %ptr) #2 { -; CHECK-LABEL: test_load_f128 -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_load_f128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret entry: %0 = load fp128, ptr %ptr, align 16 ret fp128 %0 } define void @test_vstrq_p128(ptr %ptr, i128 %val) #0 { -; CHECK-LABEL: test_vstrq_p128 -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_vstrq_p128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x2, x3, [x0] +; CHECK-NEXT: ret entry: %0 = bitcast i128 %val to fp128 @@ -27,8 +34,11 @@ entry: } define i128 @test_vldrq_p128(ptr readonly %ptr) #2 { -; CHECK-LABEL: test_vldrq_p128 -; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_vldrq_p128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x8, x1, [x0] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret entry: %0 = load fp128, ptr %ptr, align 16 @@ -37,9 +47,11 @@ entry: } define void @test_ld_st_p128(ptr nocapture %ptr) #0 { -; CHECK-LABEL: test_ld_st_p128 -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] -; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] +; CHECK-LABEL: test_ld_st_p128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: str q0, [x0, #16] +; CHECK-NEXT: ret entry: %0 = load fp128, ptr %ptr, align 16 %add.ptr = getelementptr inbounds i128, ptr %ptr, i64 1 diff --git a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll index b58515c497c32..35cb44a692c3a 100644 --- a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll +++ b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=arm64-apple-ios %s -filetype=obj -o - | llvm-objdump --macho --section __DATA,__data - | FileCheck %s ; CHECK: Contents of (__DATA,__data) section diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 0cd885e599817..84288b4e59dd5 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -3,9 +3,6 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1 - define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull_v8i8_v8i16: ; CHECK: // %bb.0: @@ -1832,14 +1829,33 @@ entry: } define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { } define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: add v0.8h, v3.8h, v0.8h -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q2, q3, [x1] +; CHECK-GI-NEXT: mov d4, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v3.8b, v3.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b +; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 2a8b3ce2ae10b..493b7c29fb5c2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64 - declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index e6df9f2fb2c56..765413dc3ddf8 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -2,45 +2,6 @@ ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64 - define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull8h: ; CHECK: // %bb.0: @@ -2895,11 +2856,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { } define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) { -; CHECK-LABEL: pmull_from_extract_dup_high: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.16b, w0 -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_dup_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup v1.16b, w0 +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_dup_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v1.8b, w0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> @@ -2924,12 +2892,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) } define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) { -; CHECK-LABEL: pmull_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: dup v1.16b, v1.b[0] -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: dup v1.16b, v1.b[0] +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: dup v1.8b, v1.b[0] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll index df4889b6f09de..80ad951ee1ca2 100644 --- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll +++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll @@ -3,9 +3,6 @@ ; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE ; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64 - declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) @@ -521,12 +518,12 @@ entry: } define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { -; CHECK-LABEL: test_pmull_high_p8_128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: fmov d1, x1 -; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_pmull_high_p8_128: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: fmov d0, x3 +; CHECK-LE-NEXT: fmov d1, x1 +; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b +; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test_pmull_high_p8_128: ; CHECK-BE: // %bb.0: // %entry @@ -538,6 +535,15 @@ define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_p8_128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov v0.d[0], x0 +; CHECK-GI-NEXT: mov v1.d[0], x2 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: mov v1.d[1], x3 +; CHECK-GI-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: ret entry: %a = bitcast i128 %aa to <16 x i8> %b = bitcast i128 %bb to <16 x i8> From 6028e9411a4ac1d77b42ba2f677526b5bd6e80ab Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Thu, 30 Oct 2025 16:44:32 +0000 Subject: [PATCH 2/8] [AArch64][GlobalISel] Added correct lowering for pmull64 pmull64 needs its two i64 operands to be placed into floating point registers, so they can be treated as vectors. Placing the gMIR opcode into OnlyUsesFP within ensures these operands are placed onto fprs during Register Bank Selection. --- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 3 +- .../test/CodeGen/AArch64/arm64-neon-3vdiff.ll | 48 +++++++++++----- llvm/test/CodeGen/AArch64/arm64-vmul.ll | 57 +++++++++++++------ 3 files changed, 75 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index aad00fc62e2fe..6b920f05227ad 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -482,8 +482,6 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI, case Intrinsic::aarch64_neon_sqrdmulh: case Intrinsic::aarch64_neon_sqadd: case Intrinsic::aarch64_neon_sqsub: - case Intrinsic::aarch64_neon_pmull: - case Intrinsic::aarch64_neon_pmull64: case Intrinsic::aarch64_crypto_sha1h: case Intrinsic::aarch64_crypto_sha1c: case Intrinsic::aarch64_crypto_sha1p: @@ -562,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, case TargetOpcode::G_FCMP: case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: + case AArch64::G_PMULL: return true; case TargetOpcode::G_INTRINSIC: switch (cast(MI).getIntrinsicID()) { diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 493b7c29fb5c2..42de4194e5b26 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -2716,14 +2716,24 @@ entry: } define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { -; CHECK-LABEL: test_vmull_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 @@ -2731,12 +2741,22 @@ entry: } define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { -; CHECK-LABEL: test_vmull_high_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_high_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_high_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %0 = extractelement <2 x i64> %a, i32 1 %1 = extractelement <2 x i64> %b, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index 765413dc3ddf8..8c22d43cca430 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -3221,21 +3221,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { } define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) ret <16 x i8> %val } define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { -; CHECK-LABEL: test_pmull_high_64: -; CHECK: // %bb.0: -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_high_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %l_hi = extractelement <2 x i64> %l, i32 1 %r_hi = extractelement <2 x i64> %r, i32 1 %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) @@ -3243,13 +3257,22 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { } define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_commutable_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: add v0.16b, v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_commutable_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_commutable_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: ret %1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) %2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l) %3 = add <16 x i8> %1, %2 From 9501b89c2097eba84e59b34eef7dc6453d477619 Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Fri, 31 Oct 2025 09:12:46 +0000 Subject: [PATCH 3/8] [AArch64][GlobalISel] Fixed formatting --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index bb027e52ccc29..7644a63d8c081 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1810,7 +1810,7 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::aarch64_neon_fminnm: return LowerBinOp(TargetOpcode::G_FMINNUM); case Intrinsic::aarch64_neon_pmull: - return LowerBinOp(AArch64::G_PMULL); // Will prob need to be 64bit specific, /shrug + return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_pmull64: return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_smull: From 07830c0801ec4ba406a46fc921a6d462d1fe364b Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Tue, 4 Nov 2025 10:05:59 +0000 Subject: [PATCH 4/8] [AArch64][GlobalISel] Changed AArch64LegalizerInfo to follow DRY --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 7644a63d8c081..038ad77ae69b2 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1810,7 +1810,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::aarch64_neon_fminnm: return LowerBinOp(TargetOpcode::G_FMINNUM); case Intrinsic::aarch64_neon_pmull: - return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_pmull64: return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_smull: From a3c767fa056299aec1b4ebad2edfa15288166cc9 Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Tue, 4 Nov 2025 10:19:25 +0000 Subject: [PATCH 5/8] [AArch64][GlobalISel] Undid changes to 2s-complement-asm.ll --- llvm/test/CodeGen/AArch64/2s-complement-asm.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll index 35cb44a692c3a..b58515c497c32 100644 --- a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll +++ b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=arm64-apple-ios %s -filetype=obj -o - | llvm-objdump --macho --section __DATA,__data - | FileCheck %s ; CHECK: Contents of (__DATA,__data) section From d081c01ae3b78e4ab53771b804bc0c8681e61ae1 Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Tue, 4 Nov 2025 13:57:20 +0000 Subject: [PATCH 6/8] [AArch64][GlobalISel] Updated ISel fallback lines in arm64-vmul.ll --- llvm/test/CodeGen/AArch64/arm64-vmul.ll | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index 8c22d43cca430..90abc7d389c13 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -2,6 +2,36 @@ ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for sqdmulh_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d + define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull8h: ; CHECK: // %bb.0: From 6757ded81141d5ee8ed1fd8f0009ad472fb36be9 Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Tue, 4 Nov 2025 14:44:27 +0000 Subject: [PATCH 7/8] [AArch64][GlobalISel] Reverted unnecessary changes to 128bit_load_store.ll --- .../test/CodeGen/AArch64/128bit_load_store.ll | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/128bit_load_store.ll b/llvm/test/CodeGen/AArch64/128bit_load_store.ll index 329c49ee1a68e..ee092bc4cb7d2 100644 --- a/llvm/test/CodeGen/AArch64/128bit_load_store.ll +++ b/llvm/test/CodeGen/AArch64/128bit_load_store.ll @@ -1,31 +1,24 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s define void @test_store_f128(ptr %ptr, fp128 %val) #0 { -; CHECK-LABEL: test_store_f128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-LABEL: test_store_f128 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] entry: store fp128 %val, ptr %ptr, align 16 ret void } define fp128 @test_load_f128(ptr readonly %ptr) #2 { -; CHECK-LABEL: test_load_f128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ret +; CHECK-LABEL: test_load_f128 +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] entry: %0 = load fp128, ptr %ptr, align 16 ret fp128 %0 } define void @test_vstrq_p128(ptr %ptr, i128 %val) #0 { -; CHECK-LABEL: test_vstrq_p128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x2, x3, [x0] -; CHECK-NEXT: ret +; CHECK-LABEL: test_vstrq_p128 +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] entry: %0 = bitcast i128 %val to fp128 @@ -34,11 +27,8 @@ entry: } define i128 @test_vldrq_p128(ptr readonly %ptr) #2 { -; CHECK-LABEL: test_vldrq_p128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp x8, x1, [x0] -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: ret +; CHECK-LABEL: test_vldrq_p128 +; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] entry: %0 = load fp128, ptr %ptr, align 16 @@ -47,11 +37,9 @@ entry: } define void @test_ld_st_p128(ptr nocapture %ptr) #0 { -; CHECK-LABEL: test_ld_st_p128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: str q0, [x0, #16] -; CHECK-NEXT: ret +; CHECK-LABEL: test_ld_st_p128 +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] +; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] entry: %0 = load fp128, ptr %ptr, align 16 %add.ptr = getelementptr inbounds i128, ptr %ptr, i64 1 From 0545de26791b3d6e3b0548e71f00604143bce85b Mon Sep 17 00:00:00 2001 From: Josh Rodriguez Date: Wed, 5 Nov 2025 14:51:42 +0000 Subject: [PATCH 8/8] [AArch64][GlobalISel] Removed redundant -global-isel-abort=2 in test run lines that no longer fallback to SDAG --- llvm/test/CodeGen/AArch64/aarch64-smull.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll | 2 +- llvm/test/CodeGen/AArch64/highextractbitcast.ll | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 84288b4e59dd5..e85e808921c87 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull_v8i8_v8i16: diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 42de4194e5b26..8cb319b2c3368 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll index 80ad951ee1ca2..bd6c168ce8776 100644 --- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll +++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes CHECK,CHECK-LE ; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE -; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)