diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 68f6d0e01b80..3aa18ed95e83 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1498,6 +1498,36 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
     unsigned DestBitSize = DestTy->getScalarSizeInBits();
     unsigned XBitSize = X->getType()->getScalarSizeInBits();
 
+    // Iff we are chopping off all the zero bits that were just shifted-in,
+    // instead perform the arithmetic shift, and bypass trunc by sign-extending
+    // it directly. Either one of the lshr and trunc can have extra uses, we can
+    // fix them up, but only one of them, else we increase instruction count.
+    if (match(X,
+              m_LShr(m_Value(), m_SpecificInt_ICMP(
+                                    ICmpInst::Predicate::ICMP_EQ,
+                                    APInt(XBitSize, XBitSize - SrcBitSize)))) &&
+        (Src->hasOneUse() || X->hasOneUser())) {
+      auto *LShr = cast<Instruction>(X);
+      auto *AShr =
+          BinaryOperator::CreateAShr(LShr->getOperand(0), LShr->getOperand(1),
+                                     LShr->getName() + ".signed", LShr);
+      if (!LShr->hasOneUse()) {
+        auto *Mask =
+            ConstantExpr::getLShr(Constant::getAllOnesValue(AShr->getType()),
+                                  cast<Constant>(LShr->getOperand(1)));
+        auto *NewLShr =
+            BinaryOperator::CreateAnd(AShr, Mask, LShr->getName(), LShr);
+        replaceInstUsesWith(*LShr, NewLShr);
+      }
+      if (!Src->hasOneUse()) {
+        auto *OldTrunc = cast<Instruction>(Src);
+        auto *NewTrunc = CastInst::Create(Instruction::Trunc, AShr, SrcTy,
+                                          OldTrunc->getName(), OldTrunc);
+        replaceInstUsesWith(*OldTrunc, NewTrunc);
+      }
+      return CastInst::Create(Instruction::SExt, AShr, DestTy);
+    }
+
     // Iff X had more sign bits than the number of bits that were chopped off
     // by the truncation, we can directly sign-extend the X.
     unsigned XNumSignBits = ComputeNumSignBits(X, 0, &CI);
diff --git a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
index 174bbcb088b1..3008818ad130 100644
--- a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
@@ -12,9 +12,8 @@ declare void @usevec4(<2 x i4>)
 
 define i16 @t0(i8 %x) {
 ; CHECK-LABEL: @t0(
-; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 4
-; CHECK-NEXT:    [[B:%.*]] = trunc i8 [[A]] to i4
-; CHECK-NEXT:    [[C:%.*]] = sext i4 [[B]] to i16
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[A_SIGNED]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i8 %x, 4
@@ -25,9 +24,8 @@ define i16 @t0(i8 %x) {
 
 define i16 @t1(i8 %x) {
 ; CHECK-LABEL: @t1(
-; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[B:%.*]] = trunc i8 [[A]] to i3
-; CHECK-NEXT:    [[C:%.*]] = sext i3 [[B]] to i16
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[A_SIGNED]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i8 %x, 5
@@ -38,9 +36,8 @@ define i16 @t1(i8 %x) {
 
 define i16 @t2(i7 %x) {
 ; CHECK-LABEL: @t2(
-; CHECK-NEXT:    [[A:%.*]] = lshr i7 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = trunc i7 [[A]] to i4
-; CHECK-NEXT:    [[C:%.*]] = sext i4 [[B]] to i16
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr i7 [[X:%.*]], 3
+; CHECK-NEXT:    [[C:%.*]] = sext i7 [[A_SIGNED]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i7 %x, 3
@@ -64,9 +61,8 @@ define i16 @n3(i8 %x) {
 
 define <2 x i16> @t4_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @t4_vec_splat(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 4>
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4>
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16>
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
   %a = lshr <2 x i8> %x, <i8 4, i8 4>
@@ -77,9 +73,8 @@ define <2 x i16> @t4_vec_splat(<2 x i8> %x) {
 
 define <2 x i16> @t5_vec_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @t5_vec_undef(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4>
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16>
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
   %a = lshr <2 x i8> %x, <i8 4, i8 undef>
@@ -90,10 +85,10 @@ define <2 x i16> @t5_vec_undef(<2 x i8> %x) {
 
 define i16 @t6_extrause0(i8 %x) {
 ; CHECK-LABEL: @t6_extrause0(
-; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 4
-; CHECK-NEXT:    [[B:%.*]] = trunc i8 [[A]] to i4
-; CHECK-NEXT:    call void @use4(i4 [[B]])
-; CHECK-NEXT:    [[C:%.*]] = sext i4 [[B]] to i16
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[B1:%.*]] = trunc i8 [[A_SIGNED]] to i4
+; CHECK-NEXT:    call void @use4(i4 [[B1]])
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[A_SIGNED]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i8 %x, 4
@@ -104,10 +99,10 @@ define i16 @t6_extrause0(i8 %x) {
 }
 define <2 x i16> @t7_extrause0_vec_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @t7_extrause0_vec_undef(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4>
-; CHECK-NEXT:    call void @usevec4(<2 x i4> [[B]])
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16>
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
+; CHECK-NEXT:    [[B1:%.*]] = trunc <2 x i8> [[A_SIGNED]] to <2 x i4>
+; CHECK-NEXT:    call void @usevec4(<2 x i4> [[B1]])
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
   %a = lshr <2 x i8> %x, <i8 4, i8 undef>
@@ -118,10 +113,10 @@ define <2 x i16> @t7_extrause0_vec_undef(<2 x i8> %x) {
 }
 define i16 @t8_extrause1(i8 %x) {
 ; CHECK-LABEL: @t8_extrause1(
-; CHECK-NEXT:    [[A:%.*]] = lshr i8 [[X:%.*]], 4
-; CHECK-NEXT:    call void @use8(i8 [[A]])
-; CHECK-NEXT:    [[B:%.*]] = trunc i8 [[A]] to i4
-; CHECK-NEXT:    [[C:%.*]] = sext i4 [[B]] to i16
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A_SIGNED]], 15
+; CHECK-NEXT:    call void @use8(i8 [[A1]])
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[A_SIGNED]] to i16
 ; CHECK-NEXT:    ret i16 [[C]]
 ;
   %a = lshr i8 %x, 4 ; has extra use, but we can deal with that
@@ -132,10 +127,10 @@ define i16 @t8_extrause1(i8 %x) {
 }
 define <2 x i16> @t9_extrause1_vec_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @t9_extrause1_vec_undef(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
-; CHECK-NEXT:    call void @usevec8(<2 x i8> [[A]])
-; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i8> [[A]] to <2 x i4>
-; CHECK-NEXT:    [[C:%.*]] = sext <2 x i4> [[B]] to <2 x i16>
+; CHECK-NEXT:    [[A_SIGNED:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 undef>
+; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A_SIGNED]], <i8 15, i8 undef>
+; CHECK-NEXT:    call void @usevec8(<2 x i8> [[A1]])
+; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[A_SIGNED]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
   %a = lshr <2 x i8> %x, <i8 4, i8 undef>