Support for i8/i16 for bitreverse using GFNI. #88625

shamithoke · 2024-04-13T15:20:08Z

In continuation to the PR - #81764, this change extends the GFNI support to i8 and i16.

llvmbot · 2024-04-13T15:20:39Z

@llvm/pr-subscribers-backend-x86

Author: None (shamithoke)

Changes

In continuation to the PR - #81764, this change extends the GFNI support to i8 and i16.

Full diff: https://github.com/llvm/llvm-project/pull/88625.diff

3 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+11-2)
(modified) llvm/test/CodeGen/X86/bitreverse.ll (+13-44)
(modified) llvm/test/CodeGen/X86/vector-bitreverse.ll (+16-62)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b7cb4b7dafeb69..bf8d7fc175fe64 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1287,6 +1287,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
 
     if (Subtarget.hasGFNI()) {
+      setOperationAction(ISD::BITREVERSE,       MVT::i8, Custom);
+      setOperationAction(ISD::BITREVERSE,       MVT::i16, Custom);
       setOperationAction(ISD::BITREVERSE,       MVT::i32, Custom);
       setOperationAction(ISD::BITREVERSE,       MVT::i64, Custom);
     }
@@ -1501,6 +1503,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::TRUNCATE,          MVT::v32i32, Custom);
     setOperationAction(ISD::TRUNCATE,          MVT::v32i64, Custom);
 
+    if (Subtarget.hasGFNI()) {
+      setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
+    }
+
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
       setOperationAction(ISD::SETCC,           VT, Custom);
       setOperationAction(ISD::CTPOP,           VT, Custom);
@@ -31335,14 +31344,14 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
 
   // Lower i32/i64 as vXi8 BITREVERSE + BSWAP
   if (!VT.isVector()) {
-    assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64");
+    assert(VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8);
     MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
     SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
     Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8,
                       DAG.getBitcast(MVT::v16i8, Res));
     Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
                       DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL));
-    return DAG.getNode(ISD::BSWAP, DL, VT, Res);
+    return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);
   }
 
   assert(VT.isVector() && VT.getSizeInBits() >= 128);
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 704563ab1bbf70..4f2654843728f4 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -374,24 +374,10 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; GFNI-LABEL: test_bitreverse_i16:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT:    rolw $8, %di
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNI-NEXT:    shll $4, %eax
-; GFNI-NEXT:    shrl $4, %edi
-; GFNI-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNI-NEXT:    orl %eax, %edi
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNI-NEXT:    shrl $2, %edi
-; GFNI-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNI-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNI-NEXT:    movl %eax, %ecx
-; GFNI-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNI-NEXT:    shrl %eax
-; GFNI-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNI-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    rolw $8, %ax
 ; GFNI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNI-NEXT:    retq
   %b = call i16 @llvm.bitreverse.i16(i16 %a)
@@ -446,19 +432,10 @@ define i8 @test_bitreverse_i8(i8 %a) {
 ;
 ; GFNI-LABEL: test_bitreverse_i8:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    rolb $4, %dil
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andb $51, %al
-; GFNI-NEXT:    shlb $2, %al
-; GFNI-NEXT:    shrb $2, %dil
-; GFNI-NEXT:    andb $51, %dil
-; GFNI-NEXT:    orb %dil, %al
-; GFNI-NEXT:    movl %eax, %ecx
-; GFNI-NEXT:    andb $85, %cl
-; GFNI-NEXT:    addb %cl, %cl
-; GFNI-NEXT:    shrb %al
-; GFNI-NEXT:    andb $85, %al
-; GFNI-NEXT:    orb %cl, %al
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNI-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
@@ -514,19 +491,11 @@ define i4 @test_bitreverse_i4(i4 %a) {
 ;
 ; GFNI-LABEL: test_bitreverse_i4:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    andb $8, %al
-; GFNI-NEXT:    leal (%rdi,%rdi), %ecx
-; GFNI-NEXT:    andb $4, %cl
-; GFNI-NEXT:    leal (,%rdi,8), %edx
-; GFNI-NEXT:    andb $8, %dl
-; GFNI-NEXT:    orb %cl, %dl
-; GFNI-NEXT:    shrb %dil
-; GFNI-NEXT:    andb $2, %dil
-; GFNI-NEXT:    orb %dil, %dl
-; GFNI-NEXT:    shrb $3, %al
-; GFNI-NEXT:    orb %dl, %al
+; GFNI-NEXT:    vmovd %edi, %xmm0
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; GFNI-NEXT:    vmovd %xmm0, %eax
+; GFNI-NEXT:    shrb $4, %al
+; GFNI-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNI-NEXT:    retq
   %b = call i4 @llvm.bitreverse.i4(i4 %a)
   ret i4 %b
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index b22b508db8b280..90cc3d5fdde829 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -61,36 +61,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
 ;
 ; GFNISSE-LABEL: test_bitreverse_i8:
 ; GFNISSE:       # %bb.0:
-; GFNISSE-NEXT:    rolb $4, %dil
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andb $51, %al
-; GFNISSE-NEXT:    shlb $2, %al
-; GFNISSE-NEXT:    shrb $2, %dil
-; GFNISSE-NEXT:    andb $51, %dil
-; GFNISSE-NEXT:    orb %dil, %al
-; GFNISSE-NEXT:    movl %eax, %ecx
-; GFNISSE-NEXT:    andb $85, %cl
-; GFNISSE-NEXT:    addb %cl, %cl
-; GFNISSE-NEXT:    shrb %al
-; GFNISSE-NEXT:    andb $85, %al
-; GFNISSE-NEXT:    orb %cl, %al
+; GFNISSE-NEXT:    movd %edi, %xmm0
+; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT:    movd %xmm0, %eax
+; GFNISSE-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNISSE-NEXT:    retq
 ;
 ; GFNIAVX-LABEL: test_bitreverse_i8:
 ; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    rolb $4, %dil
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andb $51, %al
-; GFNIAVX-NEXT:    shlb $2, %al
-; GFNIAVX-NEXT:    shrb $2, %dil
-; GFNIAVX-NEXT:    andb $51, %dil
-; GFNIAVX-NEXT:    orb %dil, %al
-; GFNIAVX-NEXT:    movl %eax, %ecx
-; GFNIAVX-NEXT:    andb $85, %cl
-; GFNIAVX-NEXT:    addb %cl, %cl
-; GFNIAVX-NEXT:    shrb %al
-; GFNIAVX-NEXT:    andb $85, %al
-; GFNIAVX-NEXT:    orb %cl, %al
+; GFNIAVX-NEXT:    vmovd %edi, %xmm0
+; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT:    vmovd %xmm0, %eax
+; GFNIAVX-NEXT:    # kill: def $al killed $al killed $eax
 ; GFNIAVX-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
@@ -153,47 +135,19 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ;
 ; GFNISSE-LABEL: test_bitreverse_i16:
 ; GFNISSE:       # %bb.0:
-; GFNISSE-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNISSE-NEXT:    rolw $8, %di
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNISSE-NEXT:    shll $4, %eax
-; GFNISSE-NEXT:    shrl $4, %edi
-; GFNISSE-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNISSE-NEXT:    orl %eax, %edi
-; GFNISSE-NEXT:    movl %edi, %eax
-; GFNISSE-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNISSE-NEXT:    shrl $2, %edi
-; GFNISSE-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNISSE-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNISSE-NEXT:    movl %eax, %ecx
-; GFNISSE-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNISSE-NEXT:    shrl %eax
-; GFNISSE-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNISSE-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNISSE-NEXT:    movd %edi, %xmm0
+; GFNISSE-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNISSE-NEXT:    movd %xmm0, %eax
+; GFNISSE-NEXT:    rolw $8, %ax
 ; GFNISSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNISSE-NEXT:    retq
 ;
 ; GFNIAVX-LABEL: test_bitreverse_i16:
 ; GFNIAVX:       # %bb.0:
-; GFNIAVX-NEXT:    # kill: def $edi killed $edi def $rdi
-; GFNIAVX-NEXT:    rolw $8, %di
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andl $3855, %eax # imm = 0xF0F
-; GFNIAVX-NEXT:    shll $4, %eax
-; GFNIAVX-NEXT:    shrl $4, %edi
-; GFNIAVX-NEXT:    andl $3855, %edi # imm = 0xF0F
-; GFNIAVX-NEXT:    orl %eax, %edi
-; GFNIAVX-NEXT:    movl %edi, %eax
-; GFNIAVX-NEXT:    andl $13107, %eax # imm = 0x3333
-; GFNIAVX-NEXT:    shrl $2, %edi
-; GFNIAVX-NEXT:    andl $13107, %edi # imm = 0x3333
-; GFNIAVX-NEXT:    leal (%rdi,%rax,4), %eax
-; GFNIAVX-NEXT:    movl %eax, %ecx
-; GFNIAVX-NEXT:    andl $21845, %ecx # imm = 0x5555
-; GFNIAVX-NEXT:    shrl %eax
-; GFNIAVX-NEXT:    andl $21845, %eax # imm = 0x5555
-; GFNIAVX-NEXT:    leal (%rax,%rcx,2), %eax
+; GFNIAVX-NEXT:    vmovd %edi, %xmm0
+; GFNIAVX-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX-NEXT:    vmovd %xmm0, %eax
+; GFNIAVX-NEXT:    rolw $8, %ax
 ; GFNIAVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GFNIAVX-NEXT:    retq
   %b = call i16 @llvm.bitreverse.i16(i16 %a)

github-actions · 2024-04-13T15:23:06Z

✅ With the latest revision this PR passed the C/C++ code formatter.

RKSimon

cheers!

RKSimon · 2024-04-13T15:25:08Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+      setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
+      setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
+    }


You can drop this (I moved it to SSSE3 after your patch to support Tremont)

Yes - I noticed that you moved to the SSE3, but I don't quite understand how/why it works.
Is it because all the processors that support AVX also support SSSE3?

RKSimon · 2024-04-13T15:25:24Z

llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -31335,14 +31344,14 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,

  // Lower i32/i64 as vXi8 BITREVERSE + BSWAP
  if (!VT.isVector()) {
-    assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64");
+    assert(VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8);


(style) Don't lose the assert message

shamithoke · 2024-04-15T04:21:51Z

Like the last PR, please merge this one as well for me :) Thanks.

RKSimon

LGTM - cheers

shamithoke · 2024-04-15T11:01:52Z

LGTM - cheers

Thank you!

In continuation to the PR llvm#81764, this change extends the GFNI support to i8 and i16. --------- Co-authored-by: shami <shami_thoke@yahoo.com>

shamithoke · 2024-04-16T17:25:07Z

@RKSimon, How about we move the GFNI check out of SSSE like this - #88938

Support for i8/i16 for bitreverse using GFNI.

ad8cf4f

llvmbot added the backend:X86 label Apr 13, 2024

RKSimon reviewed Apr 13, 2024

View reviewed changes

shami and others added 4 commits April 13, 2024 21:33

Address review comments.

85c2d51

Formatting.

61448dd

Merge branch 'main' into gfni_i8_i16

5dc16a4

Merge branch 'main' into gfni_i8_i16

fb0a88f

RKSimon approved these changes Apr 15, 2024

View reviewed changes

RKSimon merged commit c7bd284 into llvm:main Apr 15, 2024
4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Support for i8/i16 for bitreverse using GFNI. #88625

Support for i8/i16 for bitreverse using GFNI. #88625

shamithoke commented Apr 13, 2024

llvmbot commented Apr 13, 2024

github-actions bot commented Apr 13, 2024 •

edited

RKSimon left a comment

RKSimon Apr 13, 2024

shamithoke Apr 13, 2024

RKSimon Apr 13, 2024

shamithoke Apr 13, 2024

shamithoke commented Apr 15, 2024 •

edited

RKSimon left a comment

shamithoke commented Apr 15, 2024

shamithoke commented Apr 16, 2024

Support for i8/i16 for bitreverse using GFNI. #88625

Support for i8/i16 for bitreverse using GFNI. #88625

Conversation

shamithoke commented Apr 13, 2024

llvmbot commented Apr 13, 2024

github-actions bot commented Apr 13, 2024 • edited

RKSimon left a comment

Choose a reason for hiding this comment

RKSimon Apr 13, 2024

Choose a reason for hiding this comment

shamithoke Apr 13, 2024

Choose a reason for hiding this comment

RKSimon Apr 13, 2024

Choose a reason for hiding this comment

shamithoke Apr 13, 2024

Choose a reason for hiding this comment

shamithoke commented Apr 15, 2024 • edited

RKSimon left a comment

Choose a reason for hiding this comment

shamithoke commented Apr 15, 2024

shamithoke commented Apr 16, 2024

github-actions bot commented Apr 13, 2024 •

edited

shamithoke commented Apr 15, 2024 •

edited