-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AArch64][GlobalISel] Fix creation of incorrect COPY gpr32, grp32.sub_32 #171100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
In selecting a G_STORE with i8 memty and i16 type, we would try to generate a %6:gpr32 = COPY %3.sub_32:gpr32all instruction. Make sure we only add the subreg if the Ty is larger than 32bits and the subreg is needed.
|
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesIn selecting a G_STORE with i8 memty and i16 type, we would try to generate a %6:gpr32 = COPY %3.sub_32:gpr32all instruction. Make sure we only add the subreg if the Ty is larger than 32bits and the subreg is needed. Full diff: https://github.com/llvm/llvm-project/pull/171100.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index f9db39e5f8622..88f1778680efe 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -3015,6 +3015,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
// Generate a subreg copy.
+ if (RB.getID() == AArch64::GPRRegBankID && ValTy.getSizeInBits() < 32)
+ SubReg = 0;
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
.addReg(ValReg, 0, SubReg)
.getReg(0);
diff --git a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
index 02efbe9b409de..f1635239b146b 100644
--- a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
+++ b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
@@ -1,8 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefix CHECK-BE
-; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefix CHECK-LE
+; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-SD
+; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefixes CHECK-BE
+; RUN: llc -mtriple=aarch64 -global-isel -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-GI
define i8 @test1(i32 %a, ptr %pa) {
+; CHECK-SD-LABEL: test1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str w0, [x1]
+; CHECK-SD-NEXT: ret
+;
; CHECK-BE-LABEL: test1:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: mov w8, w0
@@ -10,27 +16,28 @@ define i8 @test1(i32 %a, ptr %pa) {
; CHECK-BE-NEXT: str w8, [x1]
; CHECK-BE-NEXT: ret
;
-; CHECK-LE-LABEL: test1:
-; CHECK-LE: // %bb.0:
-; CHECK-LE-NEXT: str w0, [x1]
-; CHECK-LE-NEXT: ret
+; CHECK-GI-LABEL: test1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str w0, [x1]
+; CHECK-GI-NEXT: and w0, w0, #0xff
+; CHECK-GI-NEXT: ret
store i32 %a, ptr %pa
%res = load i8, ptr %pa
ret i8 %res
}
define i8 @test2(i32 %a, ptr %pa) {
-; CHECK-BE-LABEL: test2:
-; CHECK-BE: // %bb.0:
-; CHECK-BE-NEXT: str w0, [x1]
-; CHECK-BE-NEXT: ldrb w0, [x1, #1]
-; CHECK-BE-NEXT: ret
-;
; CHECK-LE-LABEL: test2:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: str w0, [x1]
; CHECK-LE-NEXT: ubfx w0, w0, #8, #8
; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test2:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str w0, [x1]
+; CHECK-BE-NEXT: ldrb w0, [x1, #1]
+; CHECK-BE-NEXT: ret
%p8 = getelementptr i8, ptr %pa, i32 1
store i32 %a, ptr %pa
%res = load i8, ptr %p8
@@ -38,17 +45,17 @@ define i8 @test2(i32 %a, ptr %pa) {
}
define i8 @test3(i32 %a, ptr %pa) {
-; CHECK-BE-LABEL: test3:
-; CHECK-BE: // %bb.0:
-; CHECK-BE-NEXT: str w0, [x1]
-; CHECK-BE-NEXT: ldrb w0, [x1, #2]
-; CHECK-BE-NEXT: ret
-;
; CHECK-LE-LABEL: test3:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: str w0, [x1]
; CHECK-LE-NEXT: ubfx w0, w0, #16, #8
; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test3:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str w0, [x1]
+; CHECK-BE-NEXT: ldrb w0, [x1, #2]
+; CHECK-BE-NEXT: ret
%p8 = getelementptr i8, ptr %pa, i32 2
store i32 %a, ptr %pa
%res = load i8, ptr %p8
@@ -56,18 +63,68 @@ define i8 @test3(i32 %a, ptr %pa) {
}
define i8 @test4(i32 %a, ptr %pa) {
-; CHECK-BE-LABEL: test4:
-; CHECK-BE: // %bb.0:
-; CHECK-BE-NEXT: str w0, [x1]
-; CHECK-BE-NEXT: ret
-;
; CHECK-LE-LABEL: test4:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: str w0, [x1]
; CHECK-LE-NEXT: lsr w0, w0, #24
; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test4:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str w0, [x1]
+; CHECK-BE-NEXT: ret
%p8 = getelementptr i8, ptr %pa, i32 3
store i32 %a, ptr %pa
%res = load i8, ptr %p8
ret i8 %res
}
+
+define i32 @load_i16_store_i8(ptr %p, ptr %q) {
+; CHECK-SD-LABEL: load_i16_store_i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldrb w8, [x0]
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: strb w8, [x1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-BE-LABEL: load_i16_store_i8:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: ldrb w8, [x0, #1]
+; CHECK-BE-NEXT: mov w0, wzr
+; CHECK-BE-NEXT: strb w8, [x1]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: load_i16_store_i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: strb w8, [x1]
+; CHECK-GI-NEXT: ret
+entry:
+ %l = load i16, ptr %p, align 4
+ %tr = trunc i16 %l to i8
+ store i8 %tr, ptr %q, align 1
+ ret i32 0
+}
+
+define i32 @load_i16_store_i8_freeze(ptr %p, ptr %q) {
+; CHECK-LE-LABEL: load_i16_store_i8_freeze:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: ldrh w8, [x0]
+; CHECK-LE-NEXT: mov w0, wzr
+; CHECK-LE-NEXT: strb w8, [x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: load_i16_store_i8_freeze:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: ldrh w8, [x0]
+; CHECK-BE-NEXT: mov w0, wzr
+; CHECK-BE-NEXT: strb w8, [x1]
+; CHECK-BE-NEXT: ret
+entry:
+ %l = load i16, ptr %p, align 4
+ %fr = freeze i16 %l
+ %tr = trunc i16 %fr to i8
+ store i8 %tr, ptr %q, align 1
+ ret i32 0
+}
|
| ; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-SD | ||
| ; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefixes CHECK-BE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-SD | |
| ; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefixes CHECK-BE | |
| ; RUN: llc -global-isel=0 -mtriple=aarch64 -o - %s | FileCheck %s --check-prefixes CHECK-LE,CHECK-SD | |
| ; RUN: llc -global-isel=0 -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefixes CHECK-BE |
|
|
||
| // Generate a subreg copy. | ||
| if (RB.getID() == AArch64::GPRRegBankID && ValTy.getSizeInBits() < 32) | ||
| SubReg = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| SubReg = 0; | |
| SubReg = AArch64::NoSubRegister; |
| return false; | ||
|
|
||
| // Generate a subreg copy. | ||
| if (RB.getID() == AArch64::GPRRegBankID && ValTy.getSizeInBits() < 32) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logical flow seems off; it's correcting a wrong result above for a specific case. I'd expect getSubRegForClass to work in terms of reporting an accurate subregister, based on the register class of the value type not memory
In selecting a G_STORE with i8 memty and i16 type, we would try to generate a %6:gpr32 = COPY %3.sub_32:gpr32all instruction. Make sure we only add the subreg if the Ty is larger than 32bits and the subreg is needed.