diff --git a/llvm/test/CodeGen/ARM/combine-bitreverse.ll b/llvm/test/CodeGen/ARM/combine-bitreverse.ll new file mode 100644 index 0000000000000..93fd77cb5fe15 --- /dev/null +++ b/llvm/test/CodeGen/ARM/combine-bitreverse.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mattr=v7 | FileCheck %s --check-prefixes=CHECK + +declare i16 @llvm.bswap.i16(i16) readnone +declare i32 @llvm.bswap.i32(i32) readnone +declare i32 @llvm.bitreverse.i32(i32) readnone + +define i32 @brev_and_lhs_brev32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: brev_and_lhs_brev32: +; CHECK: @ %bb.0: +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + %2 = and i32 %1, %b + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + ret i32 %3 +} + +define i32 @brev_or_lhs_brev32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: brev_or_lhs_brev32: +; CHECK: @ %bb.0: +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + %2 = or i32 %1, %b + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + ret i32 %3 +} + +define i32 @brev_xor_rhs_brev32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: brev_xor_rhs_brev32: +; CHECK: @ %bb.0: +; CHECK-NEXT: rbit r1, r1 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %2 = xor i32 %a, %1 + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + ret i32 %3 +} + +define i32 @brev_and_all_operand_multiuse(i32 %a, i32 %b) #0 { +; CHECK-LABEL: brev_and_all_operand_multiuse: +; CHECK: @ %bb.0: +; CHECK-NEXT: rbit r1, r1 +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: and.w r2, r0, r1 +; CHECK-NEXT: rbit r2, r2 +; CHECK-NEXT: muls r0, r2, r0 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) + %2 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %3 = and i32 %1, %2 + %4 = tail call i32 @llvm.bitreverse.i32(i32 %3) + %5 = mul i32 %1, %4 ;increase use of left bitreverse + %6 = mul i32 %2, %5 ;increase use of right bitreverse + + ret i32 %6 +} + +; negative test +define i32 @brev_and_rhs_brev32_multiuse1(i32 %a, i32 %b) #0 { +; CHECK-LABEL: brev_and_rhs_brev32_multiuse1: +; CHECK: @ %bb.0: +; CHECK-NEXT: rbit r1, r1 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: rbit r1, r0 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + %4 = mul i32 %2, %3 ;increase use of logical op + ret i32 %4 +} + +; negative test +define i32 @brev_and_rhs_brev32_multiuse2(i32 %a, i32 %b) #0 { +; CHECK-LABEL: brev_and_rhs_brev32_multiuse2: +; CHECK: @ %bb.0: +; CHECK-NEXT: rbit r1, r1 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + %4 = mul i32 %1, %3 ;increase use of inner bitreverse + ret i32 %4 +} + +; negative test +define i32 @brev_xor_rhs_bs32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: brev_xor_rhs_bs32: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev r1, r1 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: rbit r0, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = xor i32 %a, %1 + %3 = tail call i32 @llvm.bitreverse.i32(i32 %2) + ret i32 %3 +} + diff --git a/llvm/test/CodeGen/ARM/combine-bswap.ll b/llvm/test/CodeGen/ARM/combine-bswap.ll new file mode 100644 index 0000000000000..f1037f8da12a0 --- /dev/null +++ b/llvm/test/CodeGen/ARM/combine-bswap.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mattr=v7 | FileCheck %s --check-prefixes=CHECK + +declare i32 @llvm.bswap.i32(i32) readnone +declare i64 @llvm.bswap.i64(i64) readnone +declare i32 @llvm.bitreverse.i32(i32) readnone + +define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: bs_and_lhs_bs32: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = and i32 %1, %b + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 { +; CHECK-LABEL: bs_or_rhs_bs64: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev r2, r2 +; CHECK-NEXT: orrs r1, r2 +; CHECK-NEXT: rev r2, r1 +; CHECK-NEXT: rev r1, r3 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: rev r1, r0 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: bx lr + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = or i64 %a, %1 + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 { +; CHECK-LABEL: bs_and_all_operand_multiuse: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev r1, r1 +; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: and.w r2, r0, r1 +; CHECK-NEXT: rev r2, r2 +; CHECK-NEXT: muls r0, r2, r0 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = tail call i32 @llvm.bswap.i32(i32 %b) + %3 = and i32 %1, %2 + %4 = tail call i32 @llvm.bswap.i32(i32 %3) + %5 = mul i32 %1, %4 ;increase use of left bswap + %6 = mul i32 %2, %5 ;increase use of right bswap + + ret i32 %6 +} + +; negative test +define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 { +; CHECK-LABEL: bs_and_rhs_bs32_multiuse1: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev r1, r1 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: rev r1, r0 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + %4 = mul i32 %2, %3 ;increase use of logical op + ret i32 %4 +} + +; negative test +define i32 @bs_xor_rhs_brev32(i32 %a, i32 %b) #0 { +; CHECK-LABEL: bs_xor_rhs_brev32: +; CHECK: @ %bb.0: +; CHECK-NEXT: rbit r1, r1 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: bx lr + %1 = tail call i32 @llvm.bitreverse.i32(i32 %b) + %2 = xor i32 %a, %1 + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll index 12d62bf59ae57..35107e3d7c74b 100644 --- a/llvm/test/CodeGen/X86/combine-bitreverse.ll +++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll @@ -8,6 +8,7 @@ declare i32 @llvm.bitreverse.i32(i32) readnone declare i64 @llvm.bitreverse.i64(i64) readnone declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone +declare i32 @llvm.bswap.i32(i32) readnone ; fold (bitreverse undef) -> undef define i32 @test_undef() nounwind { diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll index 017dc960bd171..a4e7e3aaba95e 100644 --- a/llvm/test/CodeGen/X86/combine-bswap.ll +++ b/llvm/test/CodeGen/X86/combine-bswap.ll @@ -255,6 +255,168 @@ define i32 @test_bswap32_shift17(i32 %a0) { ret i32 %b } +define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_lhs_bs32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_lhs_bs32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = and i32 %1, %b + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +} + +define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 { +; X86-LABEL: bs_or_lhs_bs64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: retl +; +; X64-LABEL: bs_or_lhs_bs64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = or i64 %1, %b + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 { +; X86-LABEL: bs_xor_rhs_bs64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: retl +; +; X64-LABEL: bs_xor_rhs_bs64: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: xorq %rdi, %rax +; X64-NEXT: bswapq %rax +; X64-NEXT: retq + %1 = tail call i64 @llvm.bswap.i64(i64 %b) + %2 = xor i64 %a, %1 + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +} + +define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_all_operand_multiuse: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: bswapl %edx +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: imull %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_all_operand_multiuse: +; X64: # %bb.0: +; X64-NEXT: bswapl %edi +; X64-NEXT: bswapl %esi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %edi, %esi +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = tail call i32 @llvm.bswap.i32(i32 %b) + %3 = and i32 %1, %2 + %4 = tail call i32 @llvm.bswap.i32(i32 %3) + %5 = mul i32 %1, %4 ;increase use of left bswap + %6 = mul i32 %2, %5 ;increase use of right bswap + + ret i32 %6 +} + +; negative test +define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_rhs_bs32_multiuse1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_rhs_bs32_multiuse1: +; X64: # %bb.0: +; X64-NEXT: bswapl %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + %4 = mul i32 %2, %3 ;increase use of logical op + ret i32 %4 +} + +; negative test +define i32 @bs_and_rhs_bs32_multiuse2(i32 %a, i32 %b) #0 { +; X86-LABEL: bs_and_rhs_bs32_multiuse2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: bs_and_rhs_bs32_multiuse2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: bswapl %esi +; X64-NEXT: andl %esi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: imull %esi, %eax +; X64-NEXT: retq + %1 = tail call i32 @llvm.bswap.i32(i32 %b) + %2 = and i32 %1, %a + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + %4 = mul i32 %1, %3 ;increase use of inner bswap + ret i32 %4 +} + ; negative test define i64 @test_bswap64_shift17(i64 %a0) { ; X86-LABEL: test_bswap64_shift17: