Skip to content

Commit

Permalink
[InstCombine] try to canonicalize logical shift after bswap
Browse files Browse the repository at this point in the history
When shifting by a byte-multiple:
bswap (shl X, C) --> lshr (bswap X), C
bswap (lshr X, C) --> shl (bswap X), C

This is an IR implementation of a transform suggested in D120648.
The "swaps cancel" test models the motivating optimization from
that proposal.

Alive2 checks (as noted in the other review, we could use
knownbits to handle shift-by-variable-amount, but that can be an
enhancement patch):
https://alive2.llvm.org/ce/z/pXUaRf
https://alive2.llvm.org/ce/z/ZnaMLf

Differential Revision: https://reviews.llvm.org/D122010
  • Loading branch information
rotateright committed Mar 22, 2022
1 parent 91ea247 commit 60820e5
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 18 deletions.
18 changes: 18 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *IIOperand = II->getArgOperand(0);
Value *X = nullptr;

// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
// inverse-shift-of-bswap:
// bswap (shl X, C) --> lshr (bswap X), C
// bswap (lshr X, C) --> shl (bswap X), C
// TODO: Use knownbits to allow variable shift and non-splat vector match.
BinaryOperator *BO;
if (match(IIOperand, m_OneUse(m_BinOp(BO)))) {
const APInt *C;
if (match(BO, m_LogicalShift(m_Value(X), m_APIntAllowUndef(C))) &&
(*C & 7) == 0) {
Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
BinaryOperator::BinaryOps InverseShift =
BO->getOpcode() == Instruction::Shl ? Instruction::LShr
: Instruction::Shl;
return BinaryOperator::Create(InverseShift, NewSwap, BO->getOperand(1));
}
}

KnownBits Known = computeKnownBits(IIOperand, 0, II);
uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
Expand Down
46 changes: 28 additions & 18 deletions llvm/test/Transforms/InstCombine/bswap-fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ define i32 @test6(i32 %a) {

define i32 @lshr8_i32(i32 %x) {
; CHECK-LABEL: @lshr8_i32(
; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 8
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.bswap.i32(i32 [[S]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]])
; CHECK-NEXT: [[R:%.*]] = shl i32 [[TMP1]], 8
; CHECK-NEXT: ret i32 [[R]]
;
%s = lshr i32 %x, 8
Expand All @@ -37,8 +37,8 @@ define i32 @lshr8_i32(i32 %x) {

define <2 x i32> @lshr16_v2i32(<2 x i32> %x) {
; CHECK-LABEL: @lshr16_v2i32(
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 16, i32 16>
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[S]])
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[TMP1]], <i32 16, i32 16>
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%s = lshr <2 x i32> %x, <i32 16, i32 16>
Expand All @@ -48,14 +48,16 @@ define <2 x i32> @lshr16_v2i32(<2 x i32> %x) {

define i32 @lshr24_i32(i32 %x) {
; CHECK-LABEL: @lshr24_i32(
; CHECK-NEXT: [[S:%.*]] = and i32 [[X:%.*]], -16777216
; CHECK-NEXT: ret i32 [[S]]
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -16777216
; CHECK-NEXT: ret i32 [[TMP1]]
;
%s = lshr i32 %x, 24
%r = call i32 @llvm.bswap.i32(i32 %s)
ret i32 %r
}

; negative test - need shift-by-8-bit-multiple

define i32 @lshr12_i32(i32 %x) {
; CHECK-LABEL: @lshr12_i32(
; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 12
Expand All @@ -67,6 +69,8 @@ define i32 @lshr12_i32(i32 %x) {
ret i32 %r
}

; negative test - uses

define i32 @lshr8_i32_use(i32 %x, i32* %p) {
; CHECK-LABEL: @lshr8_i32_use(
; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 12
Expand All @@ -82,19 +86,21 @@ define i32 @lshr8_i32_use(i32 %x, i32* %p) {

define i64 @shl16_i64(i64 %x) {
; CHECK-LABEL: @shl16_i64(
; CHECK-NEXT: [[S:%.*]] = shl i64 [[X:%.*]], 16
; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.bswap.i64(i64 [[S]])
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[X:%.*]])
; CHECK-NEXT: [[R:%.*]] = lshr i64 [[TMP1]], 16
; CHECK-NEXT: ret i64 [[R]]
;
%s = shl i64 %x, 16
%r = call i64 @llvm.bswap.i64(i64 %s)
ret i64 %r
}

; poison vector element propagates

define <2 x i64> @shl16_v2i64(<2 x i64> %x) {
; CHECK-LABEL: @shl16_v2i64(
; CHECK-NEXT: [[S:%.*]] = shl <2 x i64> [[X:%.*]], <i64 poison, i64 24>
; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[S]])
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[X:%.*]])
; CHECK-NEXT: [[R:%.*]] = lshr <2 x i64> [[TMP1]], <i64 poison, i64 24>
; CHECK-NEXT: ret <2 x i64> [[R]]
;
%s = shl <2 x i64> %x, <i64 poison, i64 24>
Expand All @@ -104,14 +110,16 @@ define <2 x i64> @shl16_v2i64(<2 x i64> %x) {

define i64 @shl56_i64(i64 %x) {
; CHECK-LABEL: @shl56_i64(
; CHECK-NEXT: [[S:%.*]] = and i64 [[X:%.*]], 255
; CHECK-NEXT: ret i64 [[S]]
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 255
; CHECK-NEXT: ret i64 [[TMP1]]
;
%s = shl i64 %x, 56
%r = call i64 @llvm.bswap.i64(i64 %s)
ret i64 %r
}

; negative test - need shift-by-8-bit-multiple

define i64 @shl42_i64(i64 %x) {
; CHECK-LABEL: @shl42_i64(
; CHECK-NEXT: [[S:%.*]] = shl i64 [[X:%.*]], 42
Expand All @@ -123,6 +131,8 @@ define i64 @shl42_i64(i64 %x) {
ret i64 %r
}

; negative test - uses

define i32 @shl8_i32_use(i32 %x, i32* %p) {
; CHECK-LABEL: @shl8_i32_use(
; CHECK-NEXT: [[S:%.*]] = shl i32 [[X:%.*]], 8
Expand All @@ -136,11 +146,11 @@ define i32 @shl8_i32_use(i32 %x, i32* %p) {
ret i32 %r
}

; swaps cancel

define i64 @swap_shl16_i64(i64 %x) {
; CHECK-LABEL: @swap_shl16_i64(
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.bswap.i64(i64 [[X:%.*]])
; CHECK-NEXT: [[S:%.*]] = shl i64 [[B]], 16
; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.bswap.i64(i64 [[S]])
; CHECK-NEXT: [[R:%.*]] = lshr i64 [[X:%.*]], 16
; CHECK-NEXT: ret i64 [[R]]
;
%b = call i64 @llvm.bswap.i64(i64 %x)
Expand Down Expand Up @@ -536,11 +546,11 @@ define <2 x i64> @bs_active_high_different_negative(<2 x i64> %0) {
ret <2 x i64> %3
}

; negative test
; TODO: This should fold to 'and'.
define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
; CHECK-LABEL: @bs_active_high_undef(
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 undef>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP0:%.*]])
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP2]], <i64 56, i64 undef>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%2 = shl <2 x i64> %0, <i64 56, i64 undef>
Expand Down

0 comments on commit 60820e5

Please sign in to comment.