-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support for i8/i16 for bitreverse using GFNI. #88625
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (shamithoke) ChangesIn continuation to the PR - #81764, this change extends the GFNI support to i8 and i16. Full diff: https://github.com/llvm/llvm-project/pull/88625.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b7cb4b7dafeb69..bf8d7fc175fe64 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1287,6 +1287,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasGFNI()) {
+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
}
@@ -1501,6 +1503,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
+ if (Subtarget.hasGFNI()) {
+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
+ }
+
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
@@ -31335,14 +31344,14 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
// Lower i32/i64 as vXi8 BITREVERSE + BSWAP
if (!VT.isVector()) {
- assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64");
+ assert(VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8);
MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
DAG.getBitcast(MVT::v16i8, Res));
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::BSWAP, DL, VT, Res);
+ return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);
}
assert(VT.isVector() && VT.getSizeInBits() >= 128);
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 704563ab1bbf70..4f2654843728f4 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -374,24 +374,10 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
;
; GFNI-LABEL: test_bitreverse_i16:
; GFNI: # %bb.0:
-; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT: rolw $8, %di
-; GFNI-NEXT: movl %edi, %eax
-; GFNI-NEXT: andl $3855, %eax # imm = 0xF0F
-; GFNI-NEXT: shll $4, %eax
-; GFNI-NEXT: shrl $4, %edi
-; GFNI-NEXT: andl $3855, %edi # imm = 0xF0F
-; GFNI-NEXT: orl %eax, %edi
-; GFNI-NEXT: movl %edi, %eax
-; GFNI-NEXT: andl $13107, %eax # imm = 0x3333
-; GFNI-NEXT: shrl $2, %edi
-; GFNI-NEXT: andl $13107, %edi # imm = 0x3333
-; GFNI-NEXT: leal (%rdi,%rax,4), %eax
-; GFNI-NEXT: movl %eax, %ecx
-; GFNI-NEXT: andl $21845, %ecx # imm = 0x5555
-; GFNI-NEXT: shrl %eax
-; GFNI-NEXT: andl $21845, %eax # imm = 0x5555
-; GFNI-NEXT: leal (%rax,%rcx,2), %eax
+; GFNI-NEXT: vmovd %edi, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vmovd %xmm0, %eax
+; GFNI-NEXT: rolw $8, %ax
; GFNI-NEXT: # kill: def $ax killed $ax killed $eax
; GFNI-NEXT: retq
%b = call i16 @llvm.bitreverse.i16(i16 %a)
@@ -446,19 +432,10 @@ define i8 @test_bitreverse_i8(i8 %a) {
;
; GFNI-LABEL: test_bitreverse_i8:
; GFNI: # %bb.0:
-; GFNI-NEXT: rolb $4, %dil
-; GFNI-NEXT: movl %edi, %eax
-; GFNI-NEXT: andb $51, %al
-; GFNI-NEXT: shlb $2, %al
-; GFNI-NEXT: shrb $2, %dil
-; GFNI-NEXT: andb $51, %dil
-; GFNI-NEXT: orb %dil, %al
-; GFNI-NEXT: movl %eax, %ecx
-; GFNI-NEXT: andb $85, %cl
-; GFNI-NEXT: addb %cl, %cl
-; GFNI-NEXT: shrb %al
-; GFNI-NEXT: andb $85, %al
-; GFNI-NEXT: orb %cl, %al
+; GFNI-NEXT: vmovd %edi, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vmovd %xmm0, %eax
+; GFNI-NEXT: # kill: def $al killed $al killed $eax
; GFNI-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
@@ -514,19 +491,11 @@ define i4 @test_bitreverse_i4(i4 %a) {
;
; GFNI-LABEL: test_bitreverse_i4:
; GFNI: # %bb.0:
-; GFNI-NEXT: # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT: movl %edi, %eax
-; GFNI-NEXT: andb $8, %al
-; GFNI-NEXT: leal (%rdi,%rdi), %ecx
-; GFNI-NEXT: andb $4, %cl
-; GFNI-NEXT: leal (,%rdi,8), %edx
-; GFNI-NEXT: andb $8, %dl
-; GFNI-NEXT: orb %cl, %dl
-; GFNI-NEXT: shrb %dil
-; GFNI-NEXT: andb $2, %dil
-; GFNI-NEXT: orb %dil, %dl
-; GFNI-NEXT: shrb $3, %al
-; GFNI-NEXT: orb %dl, %al
+; GFNI-NEXT: vmovd %edi, %xmm0
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT: vmovd %xmm0, %eax
+; GFNI-NEXT: shrb $4, %al
+; GFNI-NEXT: # kill: def $al killed $al killed $eax
; GFNI-NEXT: retq
%b = call i4 @llvm.bitreverse.i4(i4 %a)
ret i4 %b
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index b22b508db8b280..90cc3d5fdde829 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -61,36 +61,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
;
; GFNISSE-LABEL: test_bitreverse_i8:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: rolb $4, %dil
-; GFNISSE-NEXT: movl %edi, %eax
-; GFNISSE-NEXT: andb $51, %al
-; GFNISSE-NEXT: shlb $2, %al
-; GFNISSE-NEXT: shrb $2, %dil
-; GFNISSE-NEXT: andb $51, %dil
-; GFNISSE-NEXT: orb %dil, %al
-; GFNISSE-NEXT: movl %eax, %ecx
-; GFNISSE-NEXT: andb $85, %cl
-; GFNISSE-NEXT: addb %cl, %cl
-; GFNISSE-NEXT: shrb %al
-; GFNISSE-NEXT: andb $85, %al
-; GFNISSE-NEXT: orb %cl, %al
+; GFNISSE-NEXT: movd %edi, %xmm0
+; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT: movd %xmm0, %eax
+; GFNISSE-NEXT: # kill: def $al killed $al killed $eax
; GFNISSE-NEXT: retq
;
; GFNIAVX-LABEL: test_bitreverse_i8:
; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: rolb $4, %dil
-; GFNIAVX-NEXT: movl %edi, %eax
-; GFNIAVX-NEXT: andb $51, %al
-; GFNIAVX-NEXT: shlb $2, %al
-; GFNIAVX-NEXT: shrb $2, %dil
-; GFNIAVX-NEXT: andb $51, %dil
-; GFNIAVX-NEXT: orb %dil, %al
-; GFNIAVX-NEXT: movl %eax, %ecx
-; GFNIAVX-NEXT: andb $85, %cl
-; GFNIAVX-NEXT: addb %cl, %cl
-; GFNIAVX-NEXT: shrb %al
-; GFNIAVX-NEXT: andb $85, %al
-; GFNIAVX-NEXT: orb %cl, %al
+; GFNIAVX-NEXT: vmovd %edi, %xmm0
+; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT: vmovd %xmm0, %eax
+; GFNIAVX-NEXT: # kill: def $al killed $al killed $eax
; GFNIAVX-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
@@ -153,47 +135,19 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
;
; GFNISSE-LABEL: test_bitreverse_i16:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi
-; GFNISSE-NEXT: rolw $8, %di
-; GFNISSE-NEXT: movl %edi, %eax
-; GFNISSE-NEXT: andl $3855, %eax # imm = 0xF0F
-; GFNISSE-NEXT: shll $4, %eax
-; GFNISSE-NEXT: shrl $4, %edi
-; GFNISSE-NEXT: andl $3855, %edi # imm = 0xF0F
-; GFNISSE-NEXT: orl %eax, %edi
-; GFNISSE-NEXT: movl %edi, %eax
-; GFNISSE-NEXT: andl $13107, %eax # imm = 0x3333
-; GFNISSE-NEXT: shrl $2, %edi
-; GFNISSE-NEXT: andl $13107, %edi # imm = 0x3333
-; GFNISSE-NEXT: leal (%rdi,%rax,4), %eax
-; GFNISSE-NEXT: movl %eax, %ecx
-; GFNISSE-NEXT: andl $21845, %ecx # imm = 0x5555
-; GFNISSE-NEXT: shrl %eax
-; GFNISSE-NEXT: andl $21845, %eax # imm = 0x5555
-; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax
+; GFNISSE-NEXT: movd %edi, %xmm0
+; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT: movd %xmm0, %eax
+; GFNISSE-NEXT: rolw $8, %ax
; GFNISSE-NEXT: # kill: def $ax killed $ax killed $eax
; GFNISSE-NEXT: retq
;
; GFNIAVX-LABEL: test_bitreverse_i16:
; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi
-; GFNIAVX-NEXT: rolw $8, %di
-; GFNIAVX-NEXT: movl %edi, %eax
-; GFNIAVX-NEXT: andl $3855, %eax # imm = 0xF0F
-; GFNIAVX-NEXT: shll $4, %eax
-; GFNIAVX-NEXT: shrl $4, %edi
-; GFNIAVX-NEXT: andl $3855, %edi # imm = 0xF0F
-; GFNIAVX-NEXT: orl %eax, %edi
-; GFNIAVX-NEXT: movl %edi, %eax
-; GFNIAVX-NEXT: andl $13107, %eax # imm = 0x3333
-; GFNIAVX-NEXT: shrl $2, %edi
-; GFNIAVX-NEXT: andl $13107, %edi # imm = 0x3333
-; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax
-; GFNIAVX-NEXT: movl %eax, %ecx
-; GFNIAVX-NEXT: andl $21845, %ecx # imm = 0x5555
-; GFNIAVX-NEXT: shrl %eax
-; GFNIAVX-NEXT: andl $21845, %eax # imm = 0x5555
-; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax
+; GFNIAVX-NEXT: vmovd %edi, %xmm0
+; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT: vmovd %xmm0, %eax
+; GFNIAVX-NEXT: rolw $8, %ax
; GFNIAVX-NEXT: # kill: def $ax killed $ax killed $eax
; GFNIAVX-NEXT: retq
%b = call i16 @llvm.bitreverse.i16(i16 %a)
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cheers!
setOperationAction(ISD::BITREVERSE, MVT::i16, Custom); | ||
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); | ||
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can drop this (I moved it to SSSE3 after your patch to support Tremont)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes - I noticed that you moved to the SSE3, but I don't quite understand how/why it works.
Is it because all the processors that support AVX also support SSSE3?
@@ -31335,14 +31344,14 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, | |||
|
|||
// Lower i32/i64 as vXi8 BITREVERSE + BSWAP | |||
if (!VT.isVector()) { | |||
assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64"); | |||
assert(VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) Don't lose the assert message
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Like the last PR, please merge this one as well for me :) Thanks. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Thank you! |
In continuation to the PR llvm#81764, this change extends the GFNI support to i8 and i16. --------- Co-authored-by: shami <shami_thoke@yahoo.com>
In continuation to the PR llvm#81764, this change extends the GFNI support to i8 and i16. --------- Co-authored-by: shami <shami_thoke@yahoo.com>
In continuation to the PR - #81764, this change extends the GFNI support to i8 and i16.