diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6ad493772d170..263927f451c82 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2405,6 +2405,22 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true, /*MatchBitReversals*/ true)) return BitOp; + + // R = fshl(X, X, C2) + // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize) + Value *InnerOp; + const APInt *ShAmtInnerC, *ShAmtOuterC; + if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp), + m_APInt(ShAmtInnerC))) && + match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) { + APInt Sum = *ShAmtOuterC + *ShAmtInnerC; + APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth)); + if (Modulo.isZero()) + return replaceInstUsesWith(*II, InnerOp); + Constant *ModuloC = ConstantInt::get(Ty, Modulo); + return CallInst::Create(cast(Op0)->getCalledFunction(), + {InnerOp, InnerOp, ModuloC}); + } } // fshl(X, X, Neg(Y)) --> fshr(X, X, Y) diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index 0325c60997dfd..28c541e1a9eb2 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -1214,3 +1214,75 @@ define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) { %r = call i31 @llvm.fshr.i31(i31 %x, i31 %x, i31 %n) ret i31 %r } + +define i32 @rot_const_consecutive(i32 %x) { +; CHECK-LABEL: @rot_const_consecutive( +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8) +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 13) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 27) + ret i32 %r2 +} + +define i32 @rot_const_consecutive_multi_use(i32 %x) { +; CHECK-LABEL: @rot_const_consecutive_multi_use( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7) +; CHECK-NEXT: [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 11) +; CHECK-NEXT: [[R2:%.*]] = and i32 [[R]], [[R3]] +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 4) + %and = and i32 %r, %r2 + ret i32 %and +} + +define i32 @rot_const_consecutive_cancel_out(i32 %x) { +; CHECK-LABEL: @rot_const_consecutive_cancel_out( +; CHECK-NEXT: ret i32 [[X:%.*]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 25) + ret i32 %r2 +} + +;; negative test, consecutive rotates only fold if shift amounts are const + +define i32 @rot_nonconst_shift(i32 %x, i32 %amt) { +; CHECK-LABEL: @rot_nonconst_shift( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7) +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 [[AMT:%.*]]) +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 %amt) + ret i32 %r2 +} + +;; negative test, 1st funnel shift isn't a rotate. + +define i32 @fsh_rot(i32 %x, i32 %y) { +; CHECK-LABEL: @fsh_rot( +; CHECK-NEXT: [[FSH:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[FSH]], i32 [[FSH]], i32 4) +; CHECK-NEXT: ret i32 [[R]] +; + %fsh = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7) + %r = call i32 @llvm.fshl.i32(i32 %fsh, i32 %fsh, i32 4) + ret i32 %r +} + +;; negative test, 2nd funnel shift isn't a rotate. + +define i32 @rot_fsh(i32 %x, i32 %y) { +; CHECK-LABEL: @rot_fsh( +; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7) +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[Y]], i32 [[R:%.*]], i32 4) +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %y, i32 4) + ret i32 %r2 +} +