-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Adopt dup(load) -> LD1R patterns from SelectionDAG #66914
Conversation
@llvm/pr-subscribers-backend-aarch64 ChangesFollow-up of #65630. Allows to completely select arm64-st1.ll and arm64-ld1.ll tests. Full diff: https://github.com/llvm/llvm-project/pull/66914.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 0f3ef2327769eae..b47605c0b797cf8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -496,3 +496,20 @@ let AddedComplexity = 19 in {
defm : VecROStoreLane64_0Pat<ro16, store, v4i16, i16, hsub, STRHroW, STRHroX>;
defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>;
}
+
+def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))),
+ (LD1Rv8b GPR64sp:$Rn)>;
+def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
+ (LD1Rv16b GPR64sp:$Rn)>;
+def : Pat<(v4i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv1d GPR64sp:$Rn)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 6657b19d24929d8..6277162788a8cb7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -13794,8 +13794,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], #1
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, #1
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13828,8 +13829,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
;
; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], x2
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, x2
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13862,8 +13864,9 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], #1
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, #1
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13888,8 +13891,9 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], x2
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, x2
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 96468b2cfa8ace8..5b5ced1097e4418 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
diff --git a/llvm/test/CodeGen/AArch64/arm64-st1.ll b/llvm/test/CodeGen/AArch64/arm64-st1.ll
index 121ca69bee21dd6..6f87c66c873451a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-st1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-st1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check.
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s
|
; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], #1 | ||
; CHECK-GISEL-NEXT: str x0, [x1] | ||
; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0] | ||
; CHECK-GISEL-NEXT: add x8, x0, #1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This size increase is due to not falling back right? If so we should add the function to the fallback NOT checks at the top of the test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, there were check lines for that in #65630, but I forgot to set up the error stream for FileCheck.
Added the corresponding commit.
8daf7ae
to
b8e9450
Compare
I've reverted this and the followup due to a test suite failure on AArch64 1778d68. https://lab.llvm.org/buildbot/#/builders/183/builds/16057 To reproduce:
|
Follow-up of #65630.
Allows to completely select arm64-st1.ll and arm64-ld1.ll tests.