-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) #89897
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesNoticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA) Alive2: https://alive2.llvm.org/ce/z/fSH-rf Patch is 24.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89897.diff 4 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 4cc7bb9c3b55a9..2b5ef68445b768 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -616,6 +616,10 @@ inline UnaryOpc_match<Opnd, true> m_ChainedUnaryOp(unsigned Opc,
return UnaryOpc_match<Opnd, true>(Opc, Op);
}
+template <typename Opnd> inline UnaryOpc_match<Opnd> m_BitReverse(const Opnd &Op) {
+ return UnaryOpc_match<Opnd>(ISD::BITREVERSE, Op);
+}
+
template <typename Opnd> inline UnaryOpc_match<Opnd> m_ZExt(const Opnd &Op) {
return UnaryOpc_match<Opnd>(ISD::ZERO_EXTEND, Op);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fd265b12d73ca4..d1176bd937b0b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10950,9 +10950,23 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
// fold (bitreverse c1) -> c2
if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
return C;
+
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
+
+ SDValue X, Y;
+
+ // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
+ sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ return DAG.getNode(ISD::SHL, DL, VT, X, Y);
+
+ // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
+ sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ return DAG.getNode(ISD::SRL, DL, VT, X, Y);
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
index f29b1699172626..704ca458535249 100644
--- a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
+++ b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV32ZBKB
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV32ZBKB
; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=RV64ZBKB
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBKB
-; TODO: These tests can be optmised
+; These tests can be optmised
; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
@@ -14,25 +14,10 @@ declare i32 @llvm.bitreverse.i32(i32)
declare i64 @llvm.bitreverse.i64(i64)
define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 27
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 24
-; RV32ZBKB-NEXT: ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 59
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 56
-; RV64ZBKB-NEXT: ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: ret
%1 = call i8 @llvm.bitreverse.i8(i8 %a)
%2 = lshr i8 %1, 3
%3 = call i8 @llvm.bitreverse.i8(i8 %2)
@@ -40,25 +25,10 @@ define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
}
define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 23
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 16
-; RV32ZBKB-NEXT: ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 55
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 48
-; RV64ZBKB-NEXT: ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 7
+; CHECK-NEXT: ret
%1 = call i16 @llvm.bitreverse.i16(i16 %a)
%2 = lshr i16 %1, 7
%3 = call i16 @llvm.bitreverse.i16(i16 %2)
@@ -68,21 +38,12 @@ define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 15
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: slli a0, a0, 15
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 47
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: slliw a0, a0, 15
; RV64ZBKB-NEXT: ret
%1 = call i32 @llvm.bitreverse.i32(i32 %a)
%2 = lshr i32 %1, 15
@@ -93,21 +54,13 @@ define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 1
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a1, a0
+; RV32ZBKB-NEXT: slli a1, a0, 1
; RV32ZBKB-NEXT: li a0, 0
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 33
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: slli a0, a0, 33
; RV64ZBKB-NEXT: ret
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = lshr i64 %1, 33
@@ -118,24 +71,14 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 24
-; RV32ZBKB-NEXT: slli a0, a0, 3
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 24
+; RV32ZBKB-NEXT: slli a0, a0, 24
+; RV32ZBKB-NEXT: srli a0, a0, 27
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 56
-; RV64ZBKB-NEXT: slli a0, a0, 3
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 56
+; RV64ZBKB-NEXT: slli a0, a0, 56
+; RV64ZBKB-NEXT: srli a0, a0, 59
; RV64ZBKB-NEXT: ret
%1 = call i8 @llvm.bitreverse.i8(i8 %a)
%2 = shl i8 %1, 3
@@ -146,24 +89,14 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 16
-; RV32ZBKB-NEXT: slli a0, a0, 7
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: srli a0, a0, 16
+; RV32ZBKB-NEXT: slli a0, a0, 16
+; RV32ZBKB-NEXT: srli a0, a0, 23
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 48
-; RV64ZBKB-NEXT: slli a0, a0, 7
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 48
+; RV64ZBKB-NEXT: slli a0, a0, 48
+; RV64ZBKB-NEXT: srli a0, a0, 55
; RV64ZBKB-NEXT: ret
%1 = call i16 @llvm.bitreverse.i16(i16 %a)
%2 = shl i16 %1, 7
@@ -174,22 +107,12 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: slli a0, a0, 15
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a0, 15
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 32
-; RV64ZBKB-NEXT: slli a0, a0, 15
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: srli a0, a0, 32
+; RV64ZBKB-NEXT: srliw a0, a0, 15
; RV64ZBKB-NEXT: ret
%1 = call i32 @llvm.bitreverse.i32(i32 %a)
%2 = shl i32 %1, 15
@@ -200,21 +123,13 @@ define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: rev8 a0, a1
-; RV32ZBKB-NEXT: brev8 a0, a0
-; RV32ZBKB-NEXT: slli a0, a0, 1
-; RV32ZBKB-NEXT: rev8 a0, a0
-; RV32ZBKB-NEXT: brev8 a0, a0
+; RV32ZBKB-NEXT: srli a0, a1, 1
; RV32ZBKB-NEXT: li a1, 0
; RV32ZBKB-NEXT: ret
;
; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
-; RV64ZBKB-NEXT: slli a0, a0, 33
-; RV64ZBKB-NEXT: rev8 a0, a0
-; RV64ZBKB-NEXT: brev8 a0, a0
+; RV64ZBKB-NEXT: srli a0, a0, 33
; RV64ZBKB-NEXT: ret
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = shl i64 %1, 33
diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll
index 9f81fab54a49d0..f3d4d691b453ba 100644
--- a/llvm/test/CodeGen/X86/combine-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll
@@ -39,86 +39,18 @@ define i32 @test_bitreverse_bitreverse(i32 %a0) nounwind {
ret i32 %c
}
-; TODO: fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
+; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
; X86-LABEL: test_bitreverse_srli_bitreverse:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655744, %ecx # imm = 0x55555540
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655680, %eax # imm = 0x55555500
-; X86-NEXT: leal (%eax,%ecx,2), %eax
-; X86-NEXT: shrl $7, %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645121, %ecx # imm = 0xF0F0F01
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645120, %eax # imm = 0xF0F0F00
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993424, %ecx # imm = 0x33333310
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NEXT: shll $7, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_srli_bitreverse:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: bswapl %edi
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: shrl $4, %edi
-; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT: orl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: leal (%rdi,%rax,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $1431655744, %ecx # imm = 0x55555540
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655680, %eax # imm = 0x55555500
-; X64-NEXT: leal (%rax,%rcx,2), %eax
-; X64-NEXT: shrl $7, %eax
-; X64-NEXT: bswapl %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $252645121, %ecx # imm = 0xF0F0F01
-; X64-NEXT: shll $4, %ecx
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: andl $252645120, %eax # imm = 0xF0F0F00
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $858993424, %ecx # imm = 0x33333310
-; X64-NEXT: shrl $2, %eax
-; X64-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X64-NEXT: leal (%rax,%rcx,4), %eax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: leal (%rax,%rcx,2), %eax
+; X64-NEXT: shll $7, %eax
; X64-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a0)
%c = lshr i32 %b, 7
@@ -129,88 +61,15 @@ define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
; X86-LABEL: test_bitreverse_srli_bitreverse_i64:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655764, %eax # imm = 0x55555554
-; X86-NEXT: leal (%eax,%ecx,2), %eax
-; X86-NEXT: shrl %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645127, %eax # imm = 0xF0F0F07
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993457, %eax # imm = 0x33333331
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: leal (%eax,%ecx,2), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: addl %edx, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_srli_bitreverse_i64:
; X64: # %bb.0:
-; X64-NEXT: bswapq %rdi
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq $4, %rax
-; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: andq %rcx, %rdi
-; X64-NEXT: shlq $4, %rdi
-; X64-NEXT: orq %rax, %rdi
-; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rax, %rdi
-; X64-NEXT: leaq (%rdi,%rcx,4), %rax
-; X64-NEXT: movabsq $6148914689804861440, %rcx # imm = 0x5555555500000000
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914685509894144, %rdx # imm = 0x5555555400000000
-; X64-NEXT: andq %rax, %rdx
-; X64-NEXT: leaq (%rdx,%rcx,2), %rax
-; X64-NEXT: shrq $33, %rax
-; X64-NEXT: bswapq %rax
-; X64-NEXT: movabsq $1085102592318504960, %rcx # imm = 0xF0F0F0F00000000
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $4, %rax
-; X64-NEXT: movabsq $1085102557958766592, %rdx # imm = 0xF0F0F0700000000
-; X64-NEXT: andq %rax, %rdx
-; X64-NEXT: shlq $4, %rcx
-; X64-NEXT: orq %rdx, %rcx
-; X64-NEXT: movabsq $3689348813882916864, %rax # imm = 0x3333333300000000
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rcx
-; X64-NEXT: movabsq $3689348805292982272, %rdx # imm = 0x3333333100000000
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: leaq (%rdx,%rax,4), %rax
-; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: shrq %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: leaq (%rax,%rdx,2), %rax
+; X64-NEXT: shlq $33, %rax
; X64-NEXT: retq
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = lshr i64 %1, 33
@@ -218,86 +77,18 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
ret i64 %3
}
-; TODO: fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
+; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
define i32 @test_bitreverse_shli_bitreverse(i32 %a0) nounwind {
; X86-LABEL: test_bitreverse_shli_bitreverse:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT: leal (%eax,%ecx,4), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl $5592405, %eax # imm = 0x555555
-; X86-NEXT: shll $6, %ecx
-; X86-NEXT: andl $-1431655808, %ecx # imm = 0xAAAAAA80
-; X86-NEXT: shll $8, %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: bswapl %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $986895, %ecx # imm = 0xF0F0F
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: shrl $4, %eax
-; X86-NEXT: andl $135204623, %eax # imm = 0x80F0F0F
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $3355443, %ecx # imm = 0x333333
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $36909875, %eax # imm = 0x2333333
-; X86-NEXT: leal (%eax,%ecx,4), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT: shrl %eax
-; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NEXT: shrl $7, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_shli_bitreverse:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: bswapl %edi
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: shrl $4, %edi
-; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT: orl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %ed...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you also do the globalisel equivalent?
ba0c748
to
9402531
Compare
LGTM but wait on 1 more. |
Sure, I haven't worked with combinerhelper much, but I'm assuming it shouldn't be that tricky. |
Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA) Alive2: https://alive2.llvm.org/ce/z/fSH-rf
…,y)) -> logicalshift(x,y) fold DAG already performs this fold (#89897), GISel is currently missing it (patch incoming)
Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA)
Alive2: https://alive2.llvm.org/ce/z/fSH-rf