diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index df7fb01ca7fad..0f83be53b8750 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -845,6 +845,26 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { } } + // Similar to above, but look through an intermediate trunc instruction. + BinaryOperator *Shr; + if (match(Op0, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr))))) && + match(Shr, m_Shr(m_Value(X), m_APInt(C1)))) { + // The larger shift direction survives through the transform. + unsigned ShrAmtC = C1->getZExtValue(); + unsigned ShDiff = ShrAmtC > ShAmtC ? ShrAmtC - ShAmtC : ShAmtC - ShrAmtC; + Constant *ShiftDiffC = ConstantInt::get(X->getType(), ShDiff); + auto ShiftOpc = ShrAmtC > ShAmtC ? Shr->getOpcode() : Instruction::Shl; + + // If C1 > C: + // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C) + // If C > C1: + // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C) + Value *NewShift = Builder.CreateBinOp(ShiftOpc, X, ShiftDiffC, "sh.diff"); + Value *Trunc = Builder.CreateTrunc(NewShift, Ty, "tr.sh.diff"); + APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC)); + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Ty, Mask)); + } + if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) { unsigned AmtSum = ShAmtC + C1->getZExtValue(); // Oversized shifts are simplified to zero in InstSimplify. @@ -853,41 +873,6 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { return BinaryOperator::CreateShl(X, ConstantInt::get(Ty, AmtSum)); } - // Fold shl(trunc(shr(x,c1)),c2) -> trunc(and(shl(shr(x,c1),c2),c2')) - // Require that the input operand is a non-poison shift-by-constant so that - // we have confidence that the shifts will get folded together. - Instruction *TrOp; - const APInt *TrShiftAmt; - if (match(Op0, m_OneUse(m_Trunc(m_Instruction(TrOp)))) && - match(TrOp, m_OneUse(m_Shr(m_Value(), m_APInt(TrShiftAmt)))) && - TrShiftAmt->ult(TrOp->getType()->getScalarSizeInBits())) { - Type *SrcTy = TrOp->getType(); - - // Okay, we'll do this xform. Make the shift of shift. - unsigned SrcSize = SrcTy->getScalarSizeInBits(); - Constant *ShAmt = ConstantInt::get(SrcTy, C->zext(SrcSize)); - - // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); - - // For logical shifts, the truncation has the effect of making the high - // part of the register be zeros. Emulate this by inserting an AND to - // clear the top bits as needed. This 'and' will usually be zapped by - // other xforms later if dead. - Constant *MaskV = - ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, BitWidth)); - - // The mask we constructed says what the trunc would do if occurring - // between the shifts. We want to know the effect *after* the second - // shift. We know that it is a logical shift by a constant, so adjust the - // mask as appropriate. - MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt); - // shift1 & 0x00FF - Value *And = Builder.CreateAnd(NSh, MaskV, Op0->getName()); - // Return the value truncated to the interesting size. - return new TruncInst(And, Ty); - } - // If we have an opposite shift by the same amount, we may be able to // reorder binops and shifts to eliminate math/logic. auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) { diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 32caf4a89d60e..18b8a2532abbd 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -743,9 +743,9 @@ define i16 @trunc_bswap_i160(i160* %a0) { ; CHECK-NEXT: [[LSHR1:%.*]] = lshr i160 [[LOAD]], 136 ; CHECK-NEXT: [[CAST1:%.*]] = trunc i160 [[LSHR1]] to i16 ; CHECK-NEXT: [[AND1:%.*]] = and i16 [[CAST1]], 255 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[LOAD]], 120 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i16 -; CHECK-NEXT: [[SHL:%.*]] = and i16 [[TMP2]], -256 +; CHECK-NEXT: [[SH_DIFF:%.*]] = lshr i160 [[LOAD]], 120 +; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i160 [[SH_DIFF]] to i16 +; CHECK-NEXT: [[SHL:%.*]] = and i16 [[TR_SH_DIFF]], -256 ; CHECK-NEXT: [[OR:%.*]] = or i16 [[AND1]], [[SHL]] ; CHECK-NEXT: ret i16 [[OR]] ; diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index 7b933649b984b..7c6618ffaa200 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -139,9 +139,9 @@ define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) { define i8 @shl_trunc_bigger_lshr(i32 %x) { ; CHECK-LABEL: @shl_trunc_bigger_lshr( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -; CHECK-NEXT: [[LT:%.*]] = and i8 [[TMP2]], -8 +; CHECK-NEXT: [[SH_DIFF:%.*]] = lshr i32 [[X:%.*]], 2 +; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i8 +; CHECK-NEXT: [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -8 ; CHECK-NEXT: ret i8 [[LT]] ; %rt = lshr i32 %x, 5 @@ -153,8 +153,8 @@ define i8 @shl_trunc_bigger_lshr(i32 %x) { define i8 @shl_trunc_smaller_lshr(i32 %x) { ; CHECK-LABEL: @shl_trunc_smaller_lshr( ; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X_TR]], 2 -; CHECK-NEXT: [[LT:%.*]] = and i8 [[TMP1]], -32 +; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = shl i8 [[X_TR]], 2 +; CHECK-NEXT: [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -32 ; CHECK-NEXT: ret i8 [[LT]] ; %rt = lshr i32 %x, 3 @@ -165,9 +165,9 @@ define i8 @shl_trunc_smaller_lshr(i32 %x) { define i24 @shl_trunc_bigger_ashr(i32 %x) { ; CHECK-LABEL: @shl_trunc_bigger_ashr( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 9 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 -; CHECK-NEXT: [[LT:%.*]] = and i24 [[TMP2]], -8 +; CHECK-NEXT: [[SH_DIFF:%.*]] = ashr i32 [[X:%.*]], 9 +; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i24 +; CHECK-NEXT: [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8 ; CHECK-NEXT: ret i24 [[LT]] ; %rt = ashr i32 %x, 12 @@ -179,8 +179,8 @@ define i24 @shl_trunc_bigger_ashr(i32 %x) { define i24 @shl_trunc_smaller_ashr(i32 %x) { ; CHECK-LABEL: @shl_trunc_smaller_ashr( ; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i24 -; CHECK-NEXT: [[TMP1:%.*]] = shl i24 [[X_TR]], 3 -; CHECK-NEXT: [[LT:%.*]] = and i24 [[TMP1]], -8192 +; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = shl i24 [[X_TR]], 3 +; CHECK-NEXT: [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8192 ; CHECK-NEXT: ret i24 [[LT]] ; %rt = ashr i32 %x, 10