diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6ad493772d170..263927f451c82 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2405,6 +2405,22 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
               matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
                                      /*MatchBitReversals*/ true))
         return BitOp;
+
+      // R = fshl(X, X, C2)
+      // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
+      Value *InnerOp;
+      const APInt *ShAmtInnerC, *ShAmtOuterC;
+      if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
+                            m_APInt(ShAmtInnerC))) &&
+          match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
+        APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
+        APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
+        if (Modulo.isZero())
+          return replaceInstUsesWith(*II, InnerOp);
+        Constant *ModuloC = ConstantInt::get(Ty, Modulo);
+        return CallInst::Create(cast<IntrinsicInst>(Op0)->getCalledFunction(),
+                                {InnerOp, InnerOp, ModuloC});
+      }
     }
 
     // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 0325c60997dfd..28c541e1a9eb2 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -1214,3 +1214,75 @@ define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) {
   %r = call i31 @llvm.fshr.i31(i31 %x, i31 %x, i31 %n)
   ret i31 %r
 }
+
+define i32 @rot_const_consecutive(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive(
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8)
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 13)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 27)
+  ret i32 %r2
+}
+
+define i32 @rot_const_consecutive_multi_use(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive_multi_use(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 11)
+; CHECK-NEXT:    [[R2:%.*]] = and i32 [[R]], [[R3]]
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 4)
+  %and = and i32 %r, %r2
+  ret i32 %and
+}
+
+define i32 @rot_const_consecutive_cancel_out(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive_cancel_out(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 25)
+  ret i32 %r2
+}
+
+;; negative test, consecutive rotates only fold if shift amounts are const
+
+define i32 @rot_nonconst_shift(i32 %x, i32 %amt) {
+; CHECK-LABEL: @rot_nonconst_shift(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 [[AMT:%.*]])
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 %amt)
+  ret i32 %r2
+}
+
+;; negative test, 1st funnel shift isn't a rotate.
+
+define i32 @fsh_rot(i32 %x, i32 %y) {
+; CHECK-LABEL: @fsh_rot(
+; CHECK-NEXT:    [[FSH:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[FSH]], i32 [[FSH]], i32 4)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %fsh = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
+  %r = call i32 @llvm.fshl.i32(i32 %fsh, i32 %fsh, i32 4)
+  ret i32 %r
+}
+
+;; negative test, 2nd funnel shift isn't a rotate.
+
+define i32 @rot_fsh(i32 %x, i32 %y) {
+; CHECK-LABEL: @rot_fsh(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[Y]], i32 [[R:%.*]], i32 4)
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %y, i32 4)
+  ret i32 %r2
+}
+