diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index f9cde57665c65..fb67e9f260b92 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1266,38 +1266,6 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II, return new ShuffleVectorInst(NewIntrinsic, Mask); } -/// Fold the following cases and accepts bswap and bitreverse intrinsics: -/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y)) -/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse) -template -static Instruction *foldBitOrderCrossLogicOp(Value *V, - InstCombiner::BuilderTy &Builder) { - static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse, - "This helper only supports BSWAP and BITREVERSE intrinsics"); - - Value *X, *Y; - if (match(V, m_OneUse(m_BitwiseLogic(m_Value(X), m_Value(Y))))) { - Value *OldReorderX, *OldReorderY; - BinaryOperator::BinaryOps Op = cast(V)->getOpcode(); - - // If both X and Y are bswap/bitreverse, the transform reduces the number - // of instructions even if there's multiuse. - // If only one operand is bswap/bitreverse, we need to ensure the operand - // have only one use. - if (match(X, m_Intrinsic(m_Value(OldReorderX))) && - match(Y, m_Intrinsic(m_Value(OldReorderY)))) { - return BinaryOperator::Create(Op, OldReorderX, OldReorderY); - } else if (match(X, m_OneUse(m_Intrinsic(m_Value(OldReorderX))))) { - Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y); - return BinaryOperator::Create(Op, OldReorderX, NewReorder); - } else if (match(Y, m_OneUse(m_Intrinsic(m_Value(OldReorderY))))) { - Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X); - return BinaryOperator::Create(Op, NewReorder, OldReorderY); - } - } - return nullptr; -} - /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -1701,12 +1669,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *V = Builder.CreateLShr(X, CV); return new TruncInst(V, IIOperand->getType()); } - - if (Instruction *crossLogicOpFold = - foldBitOrderCrossLogicOp(IIOperand, Builder)) { - return crossLogicOpFold; - } - break; } case Intrinsic::masked_load: diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index b3a80e701d2f4..5f6280bd24ced 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -543,9 +543,10 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 { ; Fold: BSWAP( OP( BSWAP(x), y ) ) -> OP( x, BSWAP(y) ) define i16 @bs_and_lhs_bs16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_and_lhs_bs16( -; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[A:%.*]] -; CHECK-NEXT: ret i16 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] ; %1 = tail call i16 @llvm.bswap.i16(i16 %a) %2 = and i16 %1, %b @@ -555,9 +556,10 @@ define i16 @bs_and_lhs_bs16(i16 %a, i16 %b) #0 { define i16 @bs_or_lhs_bs16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_or_lhs_bs16( -; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[A:%.*]] -; CHECK-NEXT: ret i16 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] ; %1 = tail call i16 @llvm.bswap.i16(i16 %a) %2 = or i16 %1, %b @@ -567,9 +569,10 @@ define i16 @bs_or_lhs_bs16(i16 %a, i16 %b) #0 { define i16 @bs_xor_lhs_bs16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_xor_lhs_bs16( -; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[A:%.*]] -; CHECK-NEXT: ret i16 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] ; %1 = tail call i16 @llvm.bswap.i16(i16 %a) %2 = xor i16 %1, %b @@ -579,9 +582,10 @@ define i16 @bs_xor_lhs_bs16(i16 %a, i16 %b) #0 { define i16 @bs_and_rhs_bs16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_and_rhs_bs16( -; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i16 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] ; %1 = tail call i16 @llvm.bswap.i16(i16 %b) %2 = and i16 %a, %1 @@ -591,9 +595,10 @@ define i16 @bs_and_rhs_bs16(i16 %a, i16 %b) #0 { define i16 @bs_or_rhs_bs16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_or_rhs_bs16( -; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i16 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] ; %1 = tail call i16 @llvm.bswap.i16(i16 %b) %2 = or i16 %a, %1 @@ -603,9 +608,10 @@ define i16 @bs_or_rhs_bs16(i16 %a, i16 %b) #0 { define i16 @bs_xor_rhs_bs16(i16 %a, i16 %b) #0 { ; CHECK-LABEL: @bs_xor_rhs_bs16( -; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i16 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP3]] ; %1 = tail call i16 @llvm.bswap.i16(i16 %b) %2 = xor i16 %a, %1 @@ -615,9 +621,10 @@ define i16 @bs_xor_rhs_bs16(i16 %a, i16 %b) #0 { define i32 @bs_and_rhs_bs32(i32 %a, i32 %b) #0 { ; CHECK-LABEL: @bs_and_rhs_bs32( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = tail call i32 @llvm.bswap.i32(i32 %b) %2 = and i32 %a, %1 @@ -627,9 +634,10 @@ define i32 @bs_and_rhs_bs32(i32 %a, i32 %b) #0 { define i32 @bs_or_rhs_bs32(i32 %a, i32 %b) #0 { ; CHECK-LABEL: @bs_or_rhs_bs32( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = tail call i32 @llvm.bswap.i32(i32 %b) %2 = or i32 %a, %1 @@ -639,9 +647,10 @@ define i32 @bs_or_rhs_bs32(i32 %a, i32 %b) #0 { define i32 @bs_xor_rhs_bs32(i32 %a, i32 %b) #0 { ; CHECK-LABEL: @bs_xor_rhs_bs32( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = tail call i32 @llvm.bswap.i32(i32 %b) %2 = xor i32 %a, %1 @@ -651,9 +660,10 @@ define i32 @bs_xor_rhs_bs32(i32 %a, i32 %b) #0 { define i64 @bs_and_rhs_bs64(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_and_rhs_bs64( -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %1 = tail call i64 @llvm.bswap.i64(i64 %b) %2 = and i64 %a, %1 @@ -663,9 +673,10 @@ define i64 @bs_and_rhs_bs64(i64 %a, i64 %b) #0 { define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_or_rhs_bs64( -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %1 = tail call i64 @llvm.bswap.i64(i64 %b) %2 = or i64 %a, %1 @@ -675,9 +686,10 @@ define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 { define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_xor_rhs_bs64( -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret i64 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %1 = tail call i64 @llvm.bswap.i64(i64 %b) %2 = xor i64 %a, %1 @@ -687,9 +699,10 @@ define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 { define <2 x i32> @bs_and_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_and_rhs_i32vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) %2 = and <2 x i32> %a, %1 @@ -699,9 +712,10 @@ define <2 x i32> @bs_and_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_or_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_or_rhs_i32vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) %2 = or <2 x i32> %a, %1 @@ -711,9 +725,10 @@ define <2 x i32> @bs_or_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_xor_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_xor_rhs_i32vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[B:%.*]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[A:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b) %2 = xor <2 x i32> %a, %1 @@ -767,10 +782,11 @@ define i64 @bs_all_operand64_multiuse_both(i64 %a, i64 %b) #0 { ; CHECK-LABEL: @bs_all_operand64_multiuse_both( ; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[A]], [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP3]]) ; CHECK-NEXT: call void @use.i64(i64 [[TMP1]]) ; CHECK-NEXT: call void @use.i64(i64 [[TMP2]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] ; %1 = tail call i64 @llvm.bswap.i64(i64 %a) %2 = tail call i64 @llvm.bswap.i64(i64 %b)