-
Notifications
You must be signed in to change notification settings - Fork 15k
[AArch64][GlobalISel] Added pmull/pmull64 intrinsic support #165740
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
GlobalISel now selects the pmull intrinsic given vector operands, without falling back to SDAG.
pmull64 needs its two i64 operands to be placed into floating point registers, so they can be treated as vectors. Placing the gMIR opcode into OnlyUsesFP within ensures these operands are placed onto fprs during Register Bank Selection.
|
@llvm/pr-subscribers-backend-aarch64 Author: Joshua Rodriguez (JoshdRod) ChangesGISel no longer falls back onto SDAG when attempting to lower the pmull and pmull64 intrinsics. Patch is 23.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165740.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 30b7b03f7a69a..52b216c7fe0f0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_PMULL : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
def G_UADDLP : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
@@ -273,6 +279,7 @@ def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
def : GINodeEquiv<G_BSP, AArch64bsp>;
+def : GINodeEquiv<G_PMULL, AArch64pmull>;
def : GINodeEquiv<G_UMULL, AArch64umull>;
def : GINodeEquiv<G_SMULL, AArch64smull>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5f93847bc680e..bb027e52ccc29 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1809,6 +1809,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_FMAXNUM);
case Intrinsic::aarch64_neon_fminnm:
return LowerBinOp(TargetOpcode::G_FMINNUM);
+ case Intrinsic::aarch64_neon_pmull:
+ return LowerBinOp(AArch64::G_PMULL); // Will prob need to be 64bit specific, /shrug
+ case Intrinsic::aarch64_neon_pmull64:
+ return LowerBinOp(AArch64::G_PMULL);
case Intrinsic::aarch64_neon_smull:
return LowerBinOp(AArch64::G_SMULL);
case Intrinsic::aarch64_neon_umull:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 6d2d70511e894..6b920f05227ad 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -560,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_FCMP:
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND:
+ case AArch64::G_PMULL:
return true;
case TargetOpcode::G_INTRINSIC:
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
diff --git a/llvm/test/CodeGen/AArch64/128bit_load_store.ll b/llvm/test/CodeGen/AArch64/128bit_load_store.ll
index ee092bc4cb7d2..329c49ee1a68e 100644
--- a/llvm/test/CodeGen/AArch64/128bit_load_store.ll
+++ b/llvm/test/CodeGen/AArch64/128bit_load_store.ll
@@ -1,24 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s
define void @test_store_f128(ptr %ptr, fp128 %val) #0 {
-; CHECK-LABEL: test_store_f128
-; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-LABEL: test_store_f128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
entry:
store fp128 %val, ptr %ptr, align 16
ret void
}
define fp128 @test_load_f128(ptr readonly %ptr) #2 {
-; CHECK-LABEL: test_load_f128
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-LABEL: test_load_f128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
entry:
%0 = load fp128, ptr %ptr, align 16
ret fp128 %0
}
define void @test_vstrq_p128(ptr %ptr, i128 %val) #0 {
-; CHECK-LABEL: test_vstrq_p128
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-LABEL: test_vstrq_p128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x2, x3, [x0]
+; CHECK-NEXT: ret
entry:
%0 = bitcast i128 %val to fp128
@@ -27,8 +34,11 @@ entry:
}
define i128 @test_vldrq_p128(ptr readonly %ptr) #2 {
-; CHECK-LABEL: test_vldrq_p128
-; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-LABEL: test_vldrq_p128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldp x8, x1, [x0]
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
entry:
%0 = load fp128, ptr %ptr, align 16
@@ -37,9 +47,11 @@ entry:
}
define void @test_ld_st_p128(ptr nocapture %ptr) #0 {
-; CHECK-LABEL: test_ld_st_p128
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
-; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16]
+; CHECK-LABEL: test_ld_st_p128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: str q0, [x0, #16]
+; CHECK-NEXT: ret
entry:
%0 = load fp128, ptr %ptr, align 16
%add.ptr = getelementptr inbounds i128, ptr %ptr, i64 1
diff --git a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll
index b58515c497c32..35cb44a692c3a 100644
--- a/llvm/test/CodeGen/AArch64/2s-complement-asm.ll
+++ b/llvm/test/CodeGen/AArch64/2s-complement-asm.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=arm64-apple-ios %s -filetype=obj -o - | llvm-objdump --macho --section __DATA,__data - | FileCheck %s
; CHECK: Contents of (__DATA,__data) section
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 0cd885e599817..84288b4e59dd5 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -3,9 +3,6 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1
-
define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: smull_v8i8_v8i16:
; CHECK: // %bb.0:
@@ -1832,14 +1829,33 @@ entry:
}
define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
-; CHECK-LABEL: pmlsl2_v8i16_uzp1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q2, [x1, #16]
-; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: str q0, [x0]
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: ldr q2, [x1, #16]
+; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-NEON-NEXT: str q0, [x0]
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ldr q2, [x1, #16]
+; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-SVE-NEXT: str q0, [x0]
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr q2, [x1, #16]
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: xtn v2.8b, v2.8h
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: str q0, [x0]
+; CHECK-GI-NEXT: ret
%5 = getelementptr inbounds i32, ptr %3, i64 4
%6 = load <8 x i16>, ptr %5, align 4
%7 = trunc <8 x i16> %6 to <8 x i8>
@@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) {
}
define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) {
-; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldp q2, q3, [x1]
-; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b
-; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
-; CHECK-NEXT: add v0.8h, v3.8h, v0.8h
-; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: str q0, [x0]
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
+; CHECK-NEON: // %bb.0: // %entry
+; CHECK-NEON-NEXT: ldp q2, q3, [x1]
+; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b
+; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h
+; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-NEON-NEXT: str q0, [x0]
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
+; CHECK-SVE: // %bb.0: // %entry
+; CHECK-SVE-NEXT: ldp q2, q3, [x1]
+; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b
+; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h
+; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-SVE-NEXT: str q0, [x0]
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldp q2, q3, [x1]
+; CHECK-GI-NEXT: mov d4, v0.d[1]
+; CHECK-GI-NEXT: xtn v2.8b, v2.8h
+; CHECK-GI-NEXT: xtn v3.8b, v3.8h
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
+; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: str q0, [x0]
+; CHECK-GI-NEXT: ret
entry:
%5 = load <8 x i16>, ptr %3, align 4
%6 = trunc <8 x i16> %5 to <8 x i8>
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index 2a8b3ce2ae10b..42de4194e5b26 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -2,11 +2,6 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64
-
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
@@ -2721,14 +2716,24 @@ entry:
}
define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
-; CHECK-LABEL: test_vmull_p64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov d0, x1
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
-; CHECK-NEXT: mov x1, v0.d[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_p64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov d0, x1
+; CHECK-SD-NEXT: fmov d1, x0
+; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-SD-NEXT: mov x1, v0.d[1]
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_p64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: fmov d1, x1
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: fmov x1, d1
+; CHECK-GI-NEXT: ret
entry:
%vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
%vmull3.i = bitcast <16 x i8> %vmull2.i to i128
@@ -2736,12 +2741,22 @@ entry:
}
define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
-; CHECK-LABEL: test_vmull_high_p64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
-; CHECK-NEXT: mov x1, v0.d[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vmull_high_p64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
+; CHECK-SD-NEXT: mov x1, v0.d[1]
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vmull_high_p64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: fmov x1, d1
+; CHECK-GI-NEXT: ret
entry:
%0 = extractelement <2 x i64> %a, i32 1
%1 = extractelement <2 x i64> %b, i32 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index e6df9f2fb2c56..8c22d43cca430 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -2,45 +2,6 @@
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for pmull8h
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64
-
define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: smull8h:
; CHECK: // %bb.0:
@@ -2895,11 +2856,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) {
}
define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) {
-; CHECK-LABEL: pmull_from_extract_dup_high:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.16b, w0
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: pmull_from_extract_dup_high:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.16b, w0
+; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pmull_from_extract_dup_high:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v1.8b, w0
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
%rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -2924,12 +2892,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs)
}
define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK-LABEL: pmull_from_extract_duplane_high:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: dup v1.16b, v1.b[0]
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: pmull_from_extract_duplane_high:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: dup v1.16b, v1.b[0]
+; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pmull_from_extract_duplane_high:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8b, v1.b[0]
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -3245,21 +3221,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
}
define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: test_pmull_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x1
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_pmull_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov d0, x1
+; CHECK-SD-NEXT: fmov d1, x0
+; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_pmull_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: fmov d1, x1
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: ret
%val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
ret <16 x i8> %val
}
define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
-; CHECK-LABEL: test_pmull_high_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_pmull_high_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_pmull_high_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
| case Intrinsic::aarch64_neon_fminnm: | ||
| return LowerBinOp(TargetOpcode::G_FMINNUM); | ||
| case Intrinsic::aarch64_neon_pmull: | ||
| return LowerBinOp(AArch64::G_PMULL); // Will prob need to be 64bit specific, /shrug |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Im not sure what needs to be 64bit. Probably best to drop the comment.
It can also do
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_pmull64:
return LowerBinOp(AArch64::G_PMULL);
| @@ -1,24 +1,31 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this need to be updated in this patch?
| @@ -1,3 +1,4 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove this?
| ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD | ||
| ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI | ||
|
|
||
| ; CHECK-GI: warning: Instruction selection used fallback path for pmull8h |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can now be:
; CHECK-GI: warning: Instruction selection used fallback path for sqdmulh_1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d
GISel no longer falls back onto SDAG when attempting to lower the pmull and pmull64 intrinsics.