-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GISel] Restrict G_BSWAP to multiples of 16 bits. #70245
Conversation
This is consistent with the IR verifier and SelectionDAG's getNode.
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-amdgpu Author: Craig Topper (topperc) ChangesThis is consistent with the IR verifier and SelectionDAG's getNode. Update tests accordingly. I tried to keep some coverage of non-pow2 when possible. X86 didn't like a G_UNMERGE_VALUES from s48 to 3 s16 that got created when I tried s48. Full diff: https://github.com/llvm/llvm-project/pull/70245.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index f3e676b3a41a2cb..dadaf60fa09da04 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1592,6 +1592,12 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("G_SEXT_INREG size must be less than source bit width", MI);
break;
}
+ case TargetOpcode::G_BSWAP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (DstTy.getScalarSizeInBits() % 16 != 0)
+ report("G_BSWAP size must be a multiple of 16 bits", MI);
+ break;
+ }
case TargetOpcode::G_SHUFFLE_VECTOR: {
const MachineOperand &MaskOp = MI->getOperand(3);
if (!MaskOp.isShuffleMask()) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir
index 6111f4966028939..fba0881d4e86f10 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir
@@ -110,48 +110,27 @@ body: |
RET_ReallyLR implicit $q0
...
---
-name: bswap_s88
+name: bswap_s80
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
- ; CHECK-LABEL: name: bswap_s88
+ ; CHECK-LABEL: name: bswap_s80
; CHECK: liveins: $x0
- ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]]
- ; CHECK: [[BSWAP1:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]]
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
- ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[BSWAP]], [[C]](s64)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
- ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[BSWAP1]], [[C1]](s64)
- ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]]
- ; CHECK: $x0 = COPY [[OR]](s64)
- ; CHECK: RET_ReallyLR implicit $x0
- %val:_(s88) = G_IMPLICIT_DEF
- %bswap:_(s88) = G_BSWAP %val
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]]
+ ; CHECK-NEXT: [[BSWAP1:%[0-9]+]]:_(s64) = G_BSWAP [[DEF]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[BSWAP]], [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[BSWAP1]], [[C1]](s64)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]]
+ ; CHECK-NEXT: $x0 = COPY [[OR]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %val:_(s80) = G_IMPLICIT_DEF
+ %bswap:_(s80) = G_BSWAP %val
%trunc:_(s64) = G_TRUNC %bswap
$x0 = COPY %trunc(s64)
RET_ReallyLR implicit $x0
...
----
-name: bswap_s4
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $x0
- ; CHECK-LABEL: name: bswap_s4
- ; CHECK: liveins: $x0
- ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[DEF]]
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 28
- ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
- ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32)
- ; CHECK: %ext:_(s64) = G_AND [[ANYEXT]], [[C1]]
- ; CHECK: $x0 = COPY %ext(s64)
- ; CHECK: RET_ReallyLR implicit $x0
- %val:_(s4) = G_IMPLICIT_DEF
- %bswap:_(s4) = G_BSWAP %val
- %ext:_(s64) = G_ZEXT %bswap
- $x0 = COPY %ext(s64)
- RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
index 2b855e33e96d4d5..63235842de57bf2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
@@ -2,42 +2,6 @@
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s
----
-name: bswap_s8
-
-body: |
- bb.0:
- liveins: $vgpr0
- ; GFX7-LABEL: name: bswap_s8
- ; GFX7: liveins: $vgpr0
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
- ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
- ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]]
- ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32)
- ; GFX8-LABEL: name: bswap_s8
- ; GFX8: liveins: $vgpr0
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s16) = G_BSWAP [[TRUNC]]
- ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[BSWAP]], [[C]](s16)
- ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
- ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- %0:_(s32) = COPY $vgpr0
- %1:_(s8) = G_TRUNC %0
- %2:_(s8) = G_BSWAP %1
- %3:_(s32) = G_ANYEXT %2
- $vgpr0 = COPY %3
-...
-
---
name: bswap_s16
@@ -74,40 +38,6 @@ body: |
$vgpr0 = COPY %3
...
----
-name: bswap_s24
-
-body: |
- bb.0:
- liveins: $vgpr0
- ; GFX7-LABEL: name: bswap_s24
- ; GFX7: liveins: $vgpr0
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
- ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
- ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32)
- ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]]
- ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32)
- ; GFX8-LABEL: name: bswap_s24
- ; GFX8: liveins: $vgpr0
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
- ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
- ; GFX8-NEXT: $vgpr0 = COPY [[LSHR]](s32)
- %0:_(s32) = COPY $vgpr0
- %1:_(s24) = G_TRUNC %0
- %2:_(s24) = G_BSWAP %1
- %3:_(s32) = G_ANYEXT %2
- $vgpr0 = COPY %3
-...
-
---
name: bswap_s32
@@ -438,3 +368,39 @@ body: |
%1:_(<2 x s64>) = G_BSWAP %0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
+
+---
+name: bswap_s48
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; GFX7-LABEL: name: bswap_s48
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
+ ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
+ ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[C]](s32)
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
+ ;
+ ; GFX8-LABEL: name: bswap_s48
+ ; GFX8: liveins: $vgpr0_vgpr1
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; GFX8-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
+ ; GFX8-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
+ ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32)
+ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[C]](s32)
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s48) = G_TRUNC %0
+ %2:_(s48) = G_BSWAP %1
+ %3:_(s64) = G_ANYEXT %2
+ $vgpr0_vgpr1 = COPY %3
+...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir
index bdac19b090d2235..2dc5f582f14945b 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-bswap.mir
@@ -4,35 +4,6 @@
# test bswap for s16, s17, s32, and s64
-...
----
-name: test_bswap17
-body: |
- bb.1:
- ; X86-32-LABEL: name: test_bswap17
- ; X86-32: [[DEF:%[0-9]+]]:_(s17) = IMPLICIT_DEF
- ; X86-32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s17)
- ; X86-32-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]]
- ; X86-32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 15
- ; X86-32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s8)
- ; X86-32-NEXT: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[LSHR]](s32)
- ; X86-32-NEXT: [[COPY:%[0-9]+]]:_(s17) = COPY [[TRUNC]](s17)
- ; X86-32-NEXT: RET 0, implicit [[COPY]](s17)
- ; X86-64-LABEL: name: test_bswap17
- ; X86-64: [[DEF:%[0-9]+]]:_(s17) = IMPLICIT_DEF
- ; X86-64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s17)
- ; X86-64-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]]
- ; X86-64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 15
- ; X86-64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s8)
- ; X86-64-NEXT: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[LSHR]](s32)
- ; X86-64-NEXT: [[COPY:%[0-9]+]]:_(s17) = COPY [[TRUNC]](s17)
- ; X86-64-NEXT: RET 0, implicit [[COPY]](s17)
- %0:_(s17) = IMPLICIT_DEF
- %1:_(s17) = G_BSWAP %0
- %2:_(s17) = COPY %1(s17)
- RET 0, implicit %2
-
-...
---
name: test_bswap64
body: |
|
@@ -4,35 +4,6 @@ | |||
|
|||
# test bswap for s16, s17, s32, and s64 | |||
|
|||
... | |||
--- | |||
name: test_bswap17 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change to bswap48 instead of removing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't see your PR description. NVM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I wrote in my description, I tried. I hit some other legalizer issue.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @e-kud
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, it seems that X86
implementation was copied from AArch64
that also fails on the following mir with the same assert
%0:_(s48) = IMPLICIT_DEF
%1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
%4:_(s32) = G_ANYEXT %1(s16)
$w0 = COPY %4(s32)
RET_ReallyLR implicit $w0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should get tested in test/MachineVerifier
This is consistent with the IR verifier and SelectionDAG's getNode.
Update tests accordingly. I tried to keep some coverage of non-pow2 when possible. X86 didn't like a G_UNMERGE_VALUES from s48 to 3 s16 that got created when I tried s48.