diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index edfa05a66288e..87f2eaddedf86 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1287,8 +1287,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (Subtarget.hasGFNI()) { - setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); - setOperationAction(ISD::BITREVERSE, MVT::i64, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i16, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i64, Custom); } // These might be better off as horizontal vector ops. @@ -31317,16 +31319,18 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, if (VT.is256BitVector() && !Subtarget.hasInt256()) return splitVectorIntUnary(Op, DAG, DL); - // Lower i32/i64 as vXi8 BITREVERSE + BSWAP + // Lower i8/i16/i32/i64 as vXi8 BITREVERSE + BSWAP if (!VT.isVector()) { - assert((VT == MVT::i32 || VT == MVT::i64) && "Only tested for i32/i64"); + assert( + (VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) && + "Only tested for i8/i16/i32/i64"); MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In); Res = DAG.getNode(ISD::BITREVERSE, DL, MVT::v16i8, DAG.getBitcast(MVT::v16i8, Res)); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(VecVT, Res), DAG.getIntPtrConstant(0, DL)); - return DAG.getNode(ISD::BSWAP, DL, VT, Res); + return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res); } assert(VT.isVector() && VT.getSizeInBits() >= 128); diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll index 704563ab1bbf7..4f2654843728f 100644 --- a/llvm/test/CodeGen/X86/bitreverse.ll +++ b/llvm/test/CodeGen/X86/bitreverse.ll @@ -374,24 +374,10 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; ; GFNI-LABEL: test_bitreverse_i16: ; GFNI: # %bb.0: -; GFNI-NEXT: # kill: def $edi killed $edi def $rdi -; GFNI-NEXT: rolw $8, %di -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $3855, %eax # imm = 0xF0F -; GFNI-NEXT: shll $4, %eax -; GFNI-NEXT: shrl $4, %edi -; GFNI-NEXT: andl $3855, %edi # imm = 0xF0F -; GFNI-NEXT: orl %eax, %edi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $13107, %eax # imm = 0x3333 -; GFNI-NEXT: shrl $2, %edi -; GFNI-NEXT: andl $13107, %edi # imm = 0x3333 -; GFNI-NEXT: leal (%rdi,%rax,4), %eax -; GFNI-NEXT: movl %eax, %ecx -; GFNI-NEXT: andl $21845, %ecx # imm = 0x5555 -; GFNI-NEXT: shrl %eax -; GFNI-NEXT: andl $21845, %eax # imm = 0x5555 -; GFNI-NEXT: leal (%rax,%rcx,2), %eax +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: rolw $8, %ax ; GFNI-NEXT: # kill: def $ax killed $ax killed $eax ; GFNI-NEXT: retq %b = call i16 @llvm.bitreverse.i16(i16 %a) @@ -446,19 +432,10 @@ define i8 @test_bitreverse_i8(i8 %a) { ; ; GFNI-LABEL: test_bitreverse_i8: ; GFNI: # %bb.0: -; GFNI-NEXT: rolb $4, %dil -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andb $51, %al -; GFNI-NEXT: shlb $2, %al -; GFNI-NEXT: shrb $2, %dil -; GFNI-NEXT: andb $51, %dil -; GFNI-NEXT: orb %dil, %al -; GFNI-NEXT: movl %eax, %ecx -; GFNI-NEXT: andb $85, %cl -; GFNI-NEXT: addb %cl, %cl -; GFNI-NEXT: shrb %al -; GFNI-NEXT: andb $85, %al -; GFNI-NEXT: orb %cl, %al +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: # kill: def $al killed $al killed $eax ; GFNI-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -514,19 +491,11 @@ define i4 @test_bitreverse_i4(i4 %a) { ; ; GFNI-LABEL: test_bitreverse_i4: ; GFNI: # %bb.0: -; GFNI-NEXT: # kill: def $edi killed $edi def $rdi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andb $8, %al -; GFNI-NEXT: leal (%rdi,%rdi), %ecx -; GFNI-NEXT: andb $4, %cl -; GFNI-NEXT: leal (,%rdi,8), %edx -; GFNI-NEXT: andb $8, %dl -; GFNI-NEXT: orb %cl, %dl -; GFNI-NEXT: shrb %dil -; GFNI-NEXT: andb $2, %dil -; GFNI-NEXT: orb %dil, %dl -; GFNI-NEXT: shrb $3, %al -; GFNI-NEXT: orb %dl, %al +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: shrb $4, %al +; GFNI-NEXT: # kill: def $al killed $al killed $eax ; GFNI-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index b22b508db8b28..90cc3d5fdde82 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -61,36 +61,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; ; GFNISSE-LABEL: test_bitreverse_i8: ; GFNISSE: # %bb.0: -; GFNISSE-NEXT: rolb $4, %dil -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andb $51, %al -; GFNISSE-NEXT: shlb $2, %al -; GFNISSE-NEXT: shrb $2, %dil -; GFNISSE-NEXT: andb $51, %dil -; GFNISSE-NEXT: orb %dil, %al -; GFNISSE-NEXT: movl %eax, %ecx -; GFNISSE-NEXT: andb $85, %cl -; GFNISSE-NEXT: addb %cl, %cl -; GFNISSE-NEXT: shrb %al -; GFNISSE-NEXT: andb $85, %al -; GFNISSE-NEXT: orb %cl, %al +; GFNISSE-NEXT: movd %edi, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: movd %xmm0, %eax +; GFNISSE-NEXT: # kill: def $al killed $al killed $eax ; GFNISSE-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_i8: ; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: rolb $4, %dil -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andb $51, %al -; GFNIAVX-NEXT: shlb $2, %al -; GFNIAVX-NEXT: shrb $2, %dil -; GFNIAVX-NEXT: andb $51, %dil -; GFNIAVX-NEXT: orb %dil, %al -; GFNIAVX-NEXT: movl %eax, %ecx -; GFNIAVX-NEXT: andb $85, %cl -; GFNIAVX-NEXT: addb %cl, %cl -; GFNIAVX-NEXT: shrb %al -; GFNIAVX-NEXT: andb $85, %al -; GFNIAVX-NEXT: orb %cl, %al +; GFNIAVX-NEXT: vmovd %edi, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: vmovd %xmm0, %eax +; GFNIAVX-NEXT: # kill: def $al killed $al killed $eax ; GFNIAVX-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -153,47 +135,19 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; ; GFNISSE-LABEL: test_bitreverse_i16: ; GFNISSE: # %bb.0: -; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi -; GFNISSE-NEXT: rolw $8, %di -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andl $3855, %eax # imm = 0xF0F -; GFNISSE-NEXT: shll $4, %eax -; GFNISSE-NEXT: shrl $4, %edi -; GFNISSE-NEXT: andl $3855, %edi # imm = 0xF0F -; GFNISSE-NEXT: orl %eax, %edi -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andl $13107, %eax # imm = 0x3333 -; GFNISSE-NEXT: shrl $2, %edi -; GFNISSE-NEXT: andl $13107, %edi # imm = 0x3333 -; GFNISSE-NEXT: leal (%rdi,%rax,4), %eax -; GFNISSE-NEXT: movl %eax, %ecx -; GFNISSE-NEXT: andl $21845, %ecx # imm = 0x5555 -; GFNISSE-NEXT: shrl %eax -; GFNISSE-NEXT: andl $21845, %eax # imm = 0x5555 -; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax +; GFNISSE-NEXT: movd %edi, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: movd %xmm0, %eax +; GFNISSE-NEXT: rolw $8, %ax ; GFNISSE-NEXT: # kill: def $ax killed $ax killed $eax ; GFNISSE-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_i16: ; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi -; GFNIAVX-NEXT: rolw $8, %di -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andl $3855, %eax # imm = 0xF0F -; GFNIAVX-NEXT: shll $4, %eax -; GFNIAVX-NEXT: shrl $4, %edi -; GFNIAVX-NEXT: andl $3855, %edi # imm = 0xF0F -; GFNIAVX-NEXT: orl %eax, %edi -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andl $13107, %eax # imm = 0x3333 -; GFNIAVX-NEXT: shrl $2, %edi -; GFNIAVX-NEXT: andl $13107, %edi # imm = 0x3333 -; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax -; GFNIAVX-NEXT: movl %eax, %ecx -; GFNIAVX-NEXT: andl $21845, %ecx # imm = 0x5555 -; GFNIAVX-NEXT: shrl %eax -; GFNIAVX-NEXT: andl $21845, %eax # imm = 0x5555 -; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX-NEXT: vmovd %edi, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: vmovd %xmm0, %eax +; GFNIAVX-NEXT: rolw $8, %ax ; GFNIAVX-NEXT: # kill: def $ax killed $ax killed $eax ; GFNIAVX-NEXT: retq %b = call i16 @llvm.bitreverse.i16(i16 %a)