diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 68f6d0e01b80..3aa18ed95e83 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1498,6 +1498,36 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) { unsigned DestBitSize = DestTy->getScalarSizeInBits(); unsigned XBitSize = X->getType()->getScalarSizeInBits(); + // Iff we are chopping off all the zero bits that were just shifted-in, + // instead perform the arithmetic shift, and bypass trunc by sign-extending + // it directly. Either one of the lshr and trunc can have extra uses, we can + // fix them up, but only one of them, else we increase instruction count. + if (match(X, + m_LShr(m_Value(), m_SpecificInt_ICMP( + ICmpInst::Predicate::ICMP_EQ, + APInt(XBitSize, XBitSize - SrcBitSize)))) && + (Src->hasOneUse() || X->hasOneUser())) { + auto *LShr = cast(X); + auto *AShr = + BinaryOperator::CreateAShr(LShr->getOperand(0), LShr->getOperand(1), + LShr->getName() + ".signed", LShr); + if (!LShr->hasOneUse()) { + auto *Mask = + ConstantExpr::getLShr(Constant::getAllOnesValue(AShr->getType()), + cast(LShr->getOperand(1))); + auto *NewLShr = + BinaryOperator::CreateAnd(AShr, Mask, LShr->getName(), LShr); + replaceInstUsesWith(*LShr, NewLShr); + } + if (!Src->hasOneUse()) { + auto *OldTrunc = cast(Src); + auto *NewTrunc = CastInst::Create(Instruction::Trunc, AShr, SrcTy, + OldTrunc->getName(), OldTrunc); + replaceInstUsesWith(*OldTrunc, NewTrunc); + } + return CastInst::Create(Instruction::SExt, AShr, DestTy); + } + // Iff X had more sign bits than the number of bits that were chopped off // by the truncation, we can directly sign-extend the X. unsigned XNumSignBits = ComputeNumSignBits(X, 0, &CI); diff --git a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll index 174bbcb088b1..3008818ad130 100644 --- a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll +++ b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll @@ -12,9 +12,8 @@ declare void @usevec4(<2 x i4>) define i16 @t0(i8 %x) { ; CHECK-LABEL: @t0( -; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 4 -; CHECK-NEXT: [[B:%.*]] = trunc i8 [[A]] to i4 -; CHECK-NEXT: [[C:%.*]] = sext i4 [[B]] to i16 +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 4 +; CHECK-NEXT: [[C:%.*]] = sext i8 [[A_SIGNED]] to i16 ; CHECK-NEXT: ret i16 [[C]] ; %a = lshr i8 %x, 4 @@ -25,9 +24,8 @@ define i16 @t0(i8 %x) { define i16 @t1(i8 %x) { ; CHECK-LABEL: @t1( -; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 5 -; CHECK-NEXT: [[B:%.*]] = trunc i8 [[A]] to i3 -; CHECK-NEXT: [[C:%.*]] = sext i3 [[B]] to i16 +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 5 +; CHECK-NEXT: [[C:%.*]] = sext i8 [[A_SIGNED]] to i16 ; CHECK-NEXT: ret i16 [[C]] ; %a = lshr i8 %x, 5 @@ -38,9 +36,8 @@ define i16 @t1(i8 %x) { define i16 @t2(i7 %x) { ; CHECK-LABEL: @t2( -; CHECK-NEXT: [[A:%.*]] = lshr i7 [[X:%.*]], 3 -; CHECK-NEXT: [[B:%.*]] = trunc i7 [[A]] to i4 -; CHECK-NEXT: [[C:%.*]] = sext i4 [[B]] to i16 +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr i7 [[X:%.*]], 3 +; CHECK-NEXT: [[C:%.*]] = sext i7 [[A_SIGNED]] to i16 ; CHECK-NEXT: ret i16 [[C]] ; %a = lshr i7 %x, 3 @@ -64,9 +61,8 @@ define i16 @n3(i8 %x) { define <2 x i16> @t4_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @t4_vec_splat( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4> -; CHECK-NEXT: [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16> +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; %a = lshr <2 x i8> %x, @@ -77,9 +73,8 @@ define <2 x i16> @t4_vec_splat(<2 x i8> %x) { define <2 x i16> @t5_vec_undef(<2 x i8> %x) { ; CHECK-LABEL: @t5_vec_undef( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4> -; CHECK-NEXT: [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16> +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; %a = lshr <2 x i8> %x, @@ -90,10 +85,10 @@ define <2 x i16> @t5_vec_undef(<2 x i8> %x) { define i16 @t6_extrause0(i8 %x) { ; CHECK-LABEL: @t6_extrause0( -; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 4 -; CHECK-NEXT: [[B:%.*]] = trunc i8 [[A]] to i4 -; CHECK-NEXT: call void @use4(i4 [[B]]) -; CHECK-NEXT: [[C:%.*]] = sext i4 [[B]] to i16 +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 4 +; CHECK-NEXT: [[B1:%.*]] = trunc i8 [[A_SIGNED]] to i4 +; CHECK-NEXT: call void @use4(i4 [[B1]]) +; CHECK-NEXT: [[C:%.*]] = sext i8 [[A_SIGNED]] to i16 ; CHECK-NEXT: ret i16 [[C]] ; %a = lshr i8 %x, 4 @@ -104,10 +99,10 @@ define i16 @t6_extrause0(i8 %x) { } define <2 x i16> @t7_extrause0_vec_undef(<2 x i8> %x) { ; CHECK-LABEL: @t7_extrause0_vec_undef( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4> -; CHECK-NEXT: call void @usevec4(<2 x i4> [[B]]) -; CHECK-NEXT: [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16> +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[B1:%.*]] = trunc <2 x i8> [[A_SIGNED]] to <2 x i4> +; CHECK-NEXT: call void @usevec4(<2 x i4> [[B1]]) +; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; %a = lshr <2 x i8> %x, @@ -118,10 +113,10 @@ define <2 x i16> @t7_extrause0_vec_undef(<2 x i8> %x) { } define i16 @t8_extrause1(i8 %x) { ; CHECK-LABEL: @t8_extrause1( -; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 4 -; CHECK-NEXT: call void @use8(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = trunc i8 [[A]] to i4 -; CHECK-NEXT: [[C:%.*]] = sext i4 [[B]] to i16 +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 4 +; CHECK-NEXT: [[A1:%.*]] = and i8 [[A_SIGNED]], 15 +; CHECK-NEXT: call void @use8(i8 [[A1]]) +; CHECK-NEXT: [[C:%.*]] = sext i8 [[A_SIGNED]] to i16 ; CHECK-NEXT: ret i16 [[C]] ; %a = lshr i8 %x, 4 ; has extra use, but we can deal with that @@ -132,10 +127,10 @@ define i16 @t8_extrause1(i8 %x) { } define <2 x i16> @t9_extrause1_vec_undef(<2 x i8> %x) { ; CHECK-LABEL: @t9_extrause1_vec_undef( -; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: call void @usevec8(<2 x i8> [[A]]) -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4> -; CHECK-NEXT: [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16> +; CHECK-NEXT: [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A_SIGNED]], +; CHECK-NEXT: call void @usevec8(<2 x i8> [[A1]]) +; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; %a = lshr <2 x i8> %x,