[DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) #89897

RKSimon · 2024-04-24T09:16:49Z

Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA)

Alive2: https://alive2.llvm.org/ce/z/fSH-rf

llvmbot · 2024-04-24T09:17:25Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA)

Alive2: https://alive2.llvm.org/ce/z/fSH-rf

Patch is 24.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89897.diff

4 Files Affected:

(modified) llvm/include/llvm/CodeGen/SDPatternMatch.h (+4)
(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+14)
(modified) llvm/test/CodeGen/RISCV/bitreverse-shift.ll (+27-112)
(modified) llvm/test/CodeGen/X86/combine-bitreverse.ll (+11-285)

diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 4cc7bb9c3b55a9..2b5ef68445b768 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -616,6 +616,10 @@ inline UnaryOpc_match<Opnd, true> m_ChainedUnaryOp(unsigned Opc,
   return UnaryOpc_match<Opnd, true>(Opc, Op);
 }
 
+template <typename Opnd> inline UnaryOpc_match<Opnd> m_BitReverse(const Opnd &Op) {
+  return UnaryOpc_match<Opnd>(ISD::BITREVERSE, Op);
+}
+
 template <typename Opnd> inline UnaryOpc_match<Opnd> m_ZExt(const Opnd &Op) {
   return UnaryOpc_match<Opnd>(ISD::ZERO_EXTEND, Op);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fd265b12d73ca4..d1176bd937b0b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10950,9 +10950,23 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
   // fold (bitreverse c1) -> c2
   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
     return C;
+
   // fold (bitreverse (bitreverse x)) -> x
   if (N0.getOpcode() == ISD::BITREVERSE)
     return N0.getOperand(0);
+
+  SDValue X, Y;
+
+  // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
+      sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+    return DAG.getNode(ISD::SHL, DL, VT, X, Y);
+
+  // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
+      sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+    return DAG.getNode(ISD::SRL, DL, VT, X, Y);
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
index f29b1699172626..704ca458535249 100644
--- a/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
+++ b/llvm/test/CodeGen/RISCV/bitreverse-shift.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV32ZBKB
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32ZBKB
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=RV64ZBKB
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64ZBKB
 
-; TODO: These tests can be optmised
+; These tests can be optmised
 ;       fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
 ;       fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
 
@@ -14,25 +14,10 @@ declare i32 @llvm.bitreverse.i32(i32)
 declare i64 @llvm.bitreverse.i64(i64)
 
 define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 27
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 24
-; RV32ZBKB-NEXT:    ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i8:
-; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 59
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 56
-; RV64ZBKB-NEXT:    ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    ret
     %1 = call i8 @llvm.bitreverse.i8(i8 %a)
     %2 = lshr i8 %1, 3
     %3 = call i8 @llvm.bitreverse.i8(i8 %2)
@@ -40,25 +25,10 @@ define i8 @test_bitreverse_srli_bitreverse_i8(i8 %a) nounwind {
 }
 
 define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
-; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 23
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 16
-; RV32ZBKB-NEXT:    ret
-;
-; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i16:
-; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 55
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 48
-; RV64ZBKB-NEXT:    ret
+; CHECK-LABEL: test_bitreverse_srli_bitreverse_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    slli a0, a0, 7
+; CHECK-NEXT:    ret
     %1 = call i16 @llvm.bitreverse.i16(i16 %a)
     %2 = lshr i16 %1, 7
     %3 = call i16 @llvm.bitreverse.i16(i16 %2)
@@ -68,21 +38,12 @@ define i16 @test_bitreverse_srli_bitreverse_i16(i16 %a) nounwind {
 define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 15
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    slli a0, a0, 15
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i32:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 47
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    slliw a0, a0, 15
 ; RV64ZBKB-NEXT:    ret
     %1 = call i32 @llvm.bitreverse.i32(i32 %a)
     %2 = lshr i32 %1, 15
@@ -93,21 +54,13 @@ define i32 @test_bitreverse_srli_bitreverse_i32(i32 %a) nounwind {
 define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 1
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a1, a0
+; RV32ZBKB-NEXT:    slli a1, a0, 1
 ; RV32ZBKB-NEXT:    li a0, 0
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 33
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    slli a0, a0, 33
 ; RV64ZBKB-NEXT:    ret
     %1 = call i64 @llvm.bitreverse.i64(i64 %a)
     %2 = lshr i64 %1, 33
@@ -118,24 +71,14 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
 define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 24
-; RV32ZBKB-NEXT:    slli a0, a0, 3
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 24
+; RV32ZBKB-NEXT:    slli a0, a0, 24
+; RV32ZBKB-NEXT:    srli a0, a0, 27
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i8:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 56
-; RV64ZBKB-NEXT:    slli a0, a0, 3
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 56
+; RV64ZBKB-NEXT:    slli a0, a0, 56
+; RV64ZBKB-NEXT:    srli a0, a0, 59
 ; RV64ZBKB-NEXT:    ret
     %1 = call i8 @llvm.bitreverse.i8(i8 %a)
     %2 = shl i8 %1, 3
@@ -146,24 +89,14 @@ define i8 @test_bitreverse_shli_bitreverse_i8(i8 %a) nounwind {
 define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 16
-; RV32ZBKB-NEXT:    slli a0, a0, 7
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    srli a0, a0, 16
+; RV32ZBKB-NEXT:    slli a0, a0, 16
+; RV32ZBKB-NEXT:    srli a0, a0, 23
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i16:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 48
-; RV64ZBKB-NEXT:    slli a0, a0, 7
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 48
+; RV64ZBKB-NEXT:    slli a0, a0, 48
+; RV64ZBKB-NEXT:    srli a0, a0, 55
 ; RV64ZBKB-NEXT:    ret
     %1 = call i16 @llvm.bitreverse.i16(i16 %a)
     %2 = shl i16 %1, 7
@@ -174,22 +107,12 @@ define i16 @test_bitreverse_shli_bitreverse_i16(i16 %a) nounwind {
 define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    slli a0, a0, 15
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a0, 15
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i32:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 32
-; RV64ZBKB-NEXT:    slli a0, a0, 15
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    srli a0, a0, 32
+; RV64ZBKB-NEXT:    srliw a0, a0, 15
 ; RV64ZBKB-NEXT:    ret
     %1 = call i32 @llvm.bitreverse.i32(i32 %a)
     %2 = shl i32 %1, 15
@@ -200,21 +123,13 @@ define i32 @test_bitreverse_shli_bitreverse_i32(i32 %a) nounwind {
 define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
 ; RV32ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
 ; RV32ZBKB:       # %bb.0:
-; RV32ZBKB-NEXT:    rev8 a0, a1
-; RV32ZBKB-NEXT:    brev8 a0, a0
-; RV32ZBKB-NEXT:    slli a0, a0, 1
-; RV32ZBKB-NEXT:    rev8 a0, a0
-; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    srli a0, a1, 1
 ; RV32ZBKB-NEXT:    li a1, 0
 ; RV32ZBKB-NEXT:    ret
 ;
 ; RV64ZBKB-LABEL: test_bitreverse_shli_bitreverse_i64:
 ; RV64ZBKB:       # %bb.0:
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
-; RV64ZBKB-NEXT:    slli a0, a0, 33
-; RV64ZBKB-NEXT:    rev8 a0, a0
-; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    srli a0, a0, 33
 ; RV64ZBKB-NEXT:    ret
     %1 = call i64 @llvm.bitreverse.i64(i64 %a)
     %2 = shl i64 %1, 33
diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll
index 9f81fab54a49d0..f3d4d691b453ba 100644
--- a/llvm/test/CodeGen/X86/combine-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll
@@ -39,86 +39,18 @@ define i32 @test_bitreverse_bitreverse(i32 %a0) nounwind {
   ret i32 %c
 }
 
-; TODO: fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
+; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
 define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
 ; X86-LABEL: test_bitreverse_srli_bitreverse:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655744, %ecx # imm = 0x55555540
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655680, %eax # imm = 0x55555500
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    shrl $7, %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645121, %ecx # imm = 0xF0F0F01
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645120, %eax # imm = 0xF0F0F00
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993424, %ecx # imm = 0x33333310
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993408, %eax # imm = 0x33333300
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    shll $7, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bitreverse_srli_bitreverse:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    bswapl %edi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %edi
-; X64-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    shrl $2, %edi
-; X64-NEXT:    andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT:    leal (%rdi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $1431655744, %ecx # imm = 0x55555540
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655680, %eax # imm = 0x55555500
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-NEXT:    shrl $7, %eax
-; X64-NEXT:    bswapl %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $252645121, %ecx # imm = 0xF0F0F01
-; X64-NEXT:    shll $4, %ecx
-; X64-NEXT:    shrl $4, %eax
-; X64-NEXT:    andl $252645120, %eax # imm = 0xF0F0F00
-; X64-NEXT:    orl %ecx, %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $858993424, %ecx # imm = 0x33333310
-; X64-NEXT:    shrl $2, %eax
-; X64-NEXT:    andl $858993408, %eax # imm = 0x33333300
-; X64-NEXT:    leal (%rax,%rcx,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NEXT:    shll $7, %eax
 ; X64-NEXT:    retq
   %b = call i32 @llvm.bitreverse.i32(i32 %a0)
   %c = lshr i32 %b, 7
@@ -129,88 +61,15 @@ define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
 define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
 ; X86-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655764, %eax # imm = 0x55555554
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645127, %eax # imm = 0xF0F0F07
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993457, %eax # imm = 0x33333331
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    addl %edx, %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bitreverse_srli_bitreverse_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    bswapq %rdi
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    andq %rcx, %rdi
-; X64-NEXT:    shlq $4, %rdi
-; X64-NEXT:    orq %rax, %rdi
-; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT:    movq %rdi, %rcx
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq $2, %rdi
-; X64-NEXT:    andq %rax, %rdi
-; X64-NEXT:    leaq (%rdi,%rcx,4), %rax
-; X64-NEXT:    movabsq $6148914689804861440, %rcx # imm = 0x5555555500000000
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq %rax
-; X64-NEXT:    movabsq $6148914685509894144, %rdx # imm = 0x5555555400000000
-; X64-NEXT:    andq %rax, %rdx
-; X64-NEXT:    leaq (%rdx,%rcx,2), %rax
-; X64-NEXT:    shrq $33, %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    movabsq $1085102592318504960, %rcx # imm = 0xF0F0F0F00000000
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    movabsq $1085102557958766592, %rdx # imm = 0xF0F0F0700000000
-; X64-NEXT:    andq %rax, %rdx
-; X64-NEXT:    shlq $4, %rcx
-; X64-NEXT:    orq %rdx, %rcx
-; X64-NEXT:    movabsq $3689348813882916864, %rax # imm = 0x3333333300000000
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    shrq $2, %rcx
-; X64-NEXT:    movabsq $3689348805292982272, %rdx # imm = 0x3333333100000000
-; X64-NEXT:    andq %rcx, %rdx
-; X64-NEXT:    leaq (%rdx,%rax,4), %rax
-; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT:    movq %rax, %rdx
-; X64-NEXT:    andq %rcx, %rdx
-; X64-NEXT:    shrq %rax
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-NEXT:    shlq $33, %rax
 ; X64-NEXT:    retq
     %1 = call i64 @llvm.bitreverse.i64(i64 %a)
     %2 = lshr i64 %1, 33
@@ -218,86 +77,18 @@ define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
     ret i64 %3
 }
 
-; TODO: fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
+; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
 define i32 @test_bitreverse_shli_bitreverse(i32 %a0) nounwind {
 ; X86-LABEL: test_bitreverse_shli_bitreverse:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %ecx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    andl $5592405, %eax # imm = 0x555555
-; X86-NEXT:    shll $6, %ecx
-; X86-NEXT:    andl $-1431655808, %ecx # imm = 0xAAAAAA80
-; X86-NEXT:    shll $8, %eax
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $986895, %ecx # imm = 0xF0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $135204623, %eax # imm = 0x80F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $3355443, %ecx # imm = 0x333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $36909875, %eax # imm = 0x2333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    shrl $7, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bitreverse_shli_bitreverse:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    bswapl %edi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %edi
-; X64-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    shrl $2, %ed...
[truncated]

llvm/test/CodeGen/RISCV/bitreverse-shift.ll

arsenm

Can you also do the globalisel equivalent?

Noticed in #89897

goldsteinn · 2024-05-02T17:34:32Z

LGTM but wait on 1 more.

RKSimon · 2024-05-02T18:19:57Z

Can you also do the globalisel equivalent?

Sure, I haven't worked with combinerhelper much, but I'm assuming it shouldn't be that tricky.

Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA) Alive2: https://alive2.llvm.org/ce/z/fSH-rf

…,y)) -> logicalshift(x,y) fold DAG already performs this fold (#89897), GISel is currently missing it (patch incoming)

Sibling patch to llvm#89897

…1355) Sibling patch to #89897

RKSimon requested review from preames, topperc and goldsteinn April 24, 2024 09:16

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Apr 24, 2024

jayfoad reviewed Apr 24, 2024

View reviewed changes

llvm/test/CodeGen/RISCV/bitreverse-shift.ll Outdated Show resolved Hide resolved

arsenm reviewed Apr 24, 2024

View reviewed changes

RKSimon added a commit that referenced this pull request Apr 24, 2024

[RISCV] bitreverse-shift.ll - fix typo

e5de95d

Noticed in #89897

RKSimon force-pushed the bitreverse-shift branch 2 times, most recently from ba0c748 to 9402531 Compare April 24, 2024 11:04

arsenm approved these changes May 2, 2024

View reviewed changes

[DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y)

906db17

Noticed while investigating GFNI per-element vector shifts (we can form SHL but not SRL/SRA) Alive2: https://alive2.llvm.org/ce/z/fSH-rf

RKSimon force-pushed the bitreverse-shift branch from 9402531 to 906db17 Compare May 3, 2024 17:23

RKSimon merged commit 522b4bf into llvm:main May 6, 2024
3 of 4 checks passed

RKSimon deleted the bitreverse-shift branch May 6, 2024 10:13

RKSimon added a commit that referenced this pull request May 7, 2024

[AArch64] Add test coverage for bitreverse(logicalshift(bitreverse(x)…

41dd07b

…,y)) -> logicalshift(x,y) fold DAG already performs this fold (#89897), GISel is currently missing it (patch incoming)

RKSimon added a commit to RKSimon/llvm-project that referenced this pull request May 7, 2024

[GISel] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y)

c838525

Sibling patch to llvm#89897

RKSimon mentioned this pull request May 7, 2024

[GISel] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) #91355

Merged

RKSimon added a commit to RKSimon/llvm-project that referenced this pull request May 7, 2024

[GISel] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y)

bf7343a

Sibling patch to llvm#89897

RKSimon added a commit to RKSimon/llvm-project that referenced this pull request May 7, 2024

[GISel] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y)

049eefd

Sibling patch to llvm#89897

RKSimon added a commit to RKSimon/llvm-project that referenced this pull request May 8, 2024

[GISel] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y)

9da3544

Sibling patch to llvm#89897

RKSimon added a commit that referenced this pull request May 8, 2024

[GISel] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) (#9…

965f3ca

…1355) Sibling patch to #89897

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) #89897

[DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) #89897

RKSimon commented Apr 24, 2024

llvmbot commented Apr 24, 2024 •

edited

Loading

arsenm left a comment

goldsteinn commented May 2, 2024

RKSimon commented May 2, 2024

[DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) #89897

[DAG] Fold bitreverse(shl/srl(bitreverse(x),y)) -> srl/shl(x,y) #89897

Conversation

RKSimon commented Apr 24, 2024

llvmbot commented Apr 24, 2024 • edited Loading

arsenm left a comment

Choose a reason for hiding this comment

goldsteinn commented May 2, 2024

RKSimon commented May 2, 2024

llvmbot commented Apr 24, 2024 •

edited

Loading