diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 30b7b03f7a69a..52b216c7fe0f0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_PMULL : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src1, type1:$src2); + let hasSideEffects = 0; +} + def G_UADDLP : AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); @@ -273,6 +279,7 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5f93847bc680e..7644a63d8c081 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1809,6 +1809,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerBinOp(TargetOpcode::G_FMAXNUM); case Intrinsic::aarch64_neon_fminnm: return LowerBinOp(TargetOpcode::G_FMINNUM); + case Intrinsic::aarch64_neon_pmull: + return LowerBinOp(AArch64::G_PMULL); + case Intrinsic::aarch64_neon_pmull64: + return LowerBinOp(AArch64::G_PMULL); case Intrinsic::aarch64_neon_smull: return LowerBinOp(AArch64::G_SMULL); case Intrinsic::aarch64_neon_umull: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 6d2d70511e894..6b920f05227ad 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -560,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, case TargetOpcode::G_FCMP: case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: + case AArch64::G_PMULL: return true; case TargetOpcode::G_INTRINSIC: switch (cast(MI).getIntrinsicID()) { diff --git a/llvm/test/CodeGen/AArch64/128bit_load_store.ll b/llvm/test/CodeGen/AArch64/128bit_load_store.ll index ee092bc4cb7d2..329c49ee1a68e 100644 --- a/llvm/test/CodeGen/AArch64/128bit_load_store.ll +++ b/llvm/test/CodeGen/AArch64/128bit_load_store.ll @@ -1,24 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s define void @test_store_f128(ptr %ptr, fp128 %val) #0 { -; CHECK-LABEL: test_store_f128 -; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_store_f128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret entry: store fp128 %val, ptr %ptr, align 16 ret void } define fp128 @test_load_f128(ptr readonly %ptr) #2 { -; CHECK-LABEL: test_load_f128 -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_load_f128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret entry: %0 = load fp128, ptr %ptr, align 16 ret fp128 %0 } define void @test_vstrq_p128(ptr %ptr, i128 %val) #0 { -; CHECK-LABEL: test_vstrq_p128 -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_vstrq_p128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x2, x3, [x0] +; CHECK-NEXT: ret entry: %0 = bitcast i128 %val to fp128 @@ -27,8 +34,11 @@ entry: } define i128 @test_vldrq_p128(ptr readonly %ptr) #2 { -; CHECK-LABEL: test_vldrq_p128 -; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-LABEL: test_vldrq_p128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x8, x1, [x0] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret entry: %0 = load fp128, ptr %ptr, align 16 @@ -37,9 +47,11 @@ entry: } define void @test_ld_st_p128(ptr nocapture %ptr) #0 { -; CHECK-LABEL: test_ld_st_p128 -; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] -; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] +; CHECK-LABEL: test_ld_st_p128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: str q0, [x0, #16] +; CHECK-NEXT: ret entry: %0 = load fp128, ptr %ptr, align 16 %add.ptr = getelementptr inbounds i128, ptr %ptr, i64 1 diff --git a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll index b58515c497c32..35cb44a692c3a 100644 --- a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll +++ b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=arm64-apple-ios %s -filetype=obj -o - | llvm-objdump --macho --section __DATA,__data - | FileCheck %s ; CHECK: Contents of (__DATA,__data) section diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 0cd885e599817..84288b4e59dd5 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -3,9 +3,6 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1 - define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull_v8i8_v8i16: ; CHECK: // %bb.0: @@ -1832,14 +1829,33 @@ entry: } define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { } define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b -; CHECK-NEXT: add v0.8h, v3.8h, v0.8h -; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-NEON-NEXT: str q0, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h +; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-SVE-NEXT: str q0, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q2, q3, [x1] +; CHECK-GI-NEXT: mov d4, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v3.8b, v3.8h +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b +; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index 2a8b3ce2ae10b..42de4194e5b26 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64 - declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) @@ -2721,14 +2716,24 @@ entry: } define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { -; CHECK-LABEL: test_vmull_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 @@ -2736,12 +2741,22 @@ entry: } define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { -; CHECK-LABEL: test_vmull_high_p64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: mov x1, v0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vmull_high_p64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vmull_high_p64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: ret entry: %0 = extractelement <2 x i64> %a, i32 1 %1 = extractelement <2 x i64> %b, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index e6df9f2fb2c56..8c22d43cca430 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -2,45 +2,6 @@ ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64 - define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull8h: ; CHECK: // %bb.0: @@ -2895,11 +2856,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { } define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) { -; CHECK-LABEL: pmull_from_extract_dup_high: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.16b, w0 -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_dup_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup v1.16b, w0 +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_dup_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v1.8b, w0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> @@ -2924,12 +2892,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) } define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) { -; CHECK-LABEL: pmull_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: dup v1.16b, v1.b[0] -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: dup v1.16b, v1.b[0] +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: dup v1.8b, v1.b[0] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> @@ -3245,21 +3221,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { } define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) ret <16 x i8> %val } define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { -; CHECK-LABEL: test_pmull_high_64: -; CHECK: // %bb.0: -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_high_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %l_hi = extractelement <2 x i64> %l, i32 1 %r_hi = extractelement <2 x i64> %r, i32 1 %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) @@ -3267,13 +3257,22 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { } define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_commutable_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: add v0.16b, v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_commutable_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_commutable_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: ret %1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) %2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l) %3 = add <16 x i8> %1, %2 diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll index df4889b6f09de..80ad951ee1ca2 100644 --- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll +++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll @@ -3,9 +3,6 @@ ; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE ; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64 - declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) @@ -521,12 +518,12 @@ entry: } define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { -; CHECK-LABEL: test_pmull_high_p8_128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: fmov d1, x1 -; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-LE-LABEL: test_pmull_high_p8_128: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: fmov d0, x3 +; CHECK-LE-NEXT: fmov d1, x1 +; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b +; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test_pmull_high_p8_128: ; CHECK-BE: // %bb.0: // %entry @@ -538,6 +535,15 @@ define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_p8_128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov v0.d[0], x0 +; CHECK-GI-NEXT: mov v1.d[0], x2 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: mov v1.d[1], x3 +; CHECK-GI-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: ret entry: %a = bitcast i128 %aa to <16 x i8> %b = bitcast i128 %bb to <16 x i8>