From 56dd41cd1e42b8750bb0e6c8e05611f2b5f1c64c Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Wed, 24 Sep 2025 19:58:33 -0600 Subject: [PATCH 1/6] pre-commit --- .../InstCombine/InstCombineCalls.cpp | 18 +++++ llvm/test/Transforms/InstCombine/fsh.ll | 75 +++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6ad493772d170..bcbe28a1080c4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2405,6 +2405,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true, /*MatchBitReversals*/ true)) return BitOp; + + // R = fshl(X, X, C2) + // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize) + Value *InnerOp0; + Value *InnerOp1; + Constant *ShAmtInnerC; + if (match(Op0, m_FShl(m_Value(InnerOp0), m_Value(InnerOp1), + m_ImmConstant(ShAmtInnerC))) && + Op0 == Op1 && InnerOp0 == InnerOp1) { + APInt Sum = + ShAmtC->getUniqueInteger() + ShAmtInnerC->getUniqueInteger(); + APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth)); + if (Modulo.isZero()) + return replaceInstUsesWith(*II, InnerOp0); + Constant *ModuloC = ConstantInt::get(Ty, Modulo); + return CallInst::Create(cast(Op0)->getCalledFunction(), + {InnerOp0, InnerOp1, ModuloC}); + } } // fshl(X, X, Neg(Y)) --> fshr(X, X, Y) diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index 0325c60997dfd..334c613c9e8d1 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -1214,3 +1214,78 @@ define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) { %r = call i31 @llvm.fshr.i31(i31 %x, i31 %x, i31 %n) ret i31 %r } + +define i32 @rot_const_consecutive(i32 %x) { +; CHECK-LABEL: @rot_const_consecutive( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 13) +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 27) +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 13) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 27) + ret i32 %r2 +} + +define i32 @rot_const_consecutive_multi_use(i32 %x) { +; CHECK-LABEL: @rot_const_consecutive_multi_use( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7) +; CHECK-NEXT: [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 4) +; CHECK-NEXT: [[R2:%.*]] = and i32 [[R]], [[R3]] +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 4) + %and = and i32 %r, %r2 + ret i32 %and +} + +define i32 @rot_const_consecutive_cancel_out(i32 %x) { +; CHECK-LABEL: @rot_const_consecutive_cancel_out( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X1:%.*]], i32 [[X1]], i32 7) +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 25) +; CHECK-NEXT: ret i32 [[X]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 25) + ret i32 %r2 +} + +;; negative test, consecutive rotates only fold if shift amounts are const + +define i32 @rot_nonconst_shift(i32 %x, i32 %amt) { +; CHECK-LABEL: @rot_nonconst_shift( +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7) +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 [[AMT:%.*]]) +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 %amt) + ret i32 %r2 +} + +;; negative test, 1st funnel shift isn't a rotate. + +define i32 @fsh_rot(i32 %x, i32 %y) { +; CHECK-LABEL: @fsh_rot( +; CHECK-NEXT: [[FSH:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[FSH]], i32 [[FSH]], i32 4) +; CHECK-NEXT: ret i32 [[R]] +; + %fsh = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7) + %r = call i32 @llvm.fshl.i32(i32 %fsh, i32 %fsh, i32 4) + ret i32 %r +} + +;; negative test, 2nd funnel shift isn't a rotate. + +define i32 @rot_fsh(i32 %x, i32 %y) { +; CHECK-LABEL: @rot_fsh( +; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7) +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[Y]], i32 [[R:%.*]], i32 4) +; CHECK-NEXT: ret i32 [[R2]] +; + %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) + %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %y, i32 4) + ret i32 %r2 +} + From 021e1b6498bd5c444a763a24a9018978ff903253 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Wed, 24 Sep 2025 20:03:29 -0600 Subject: [PATCH 2/6] The rotate transformation from https://github.com/llvm/llvm-project/blob/72c04bb882ad70230bce309c3013d9cc2c99e9a7/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L10312-L10337 has no middle-end equivalent in InstCombine. The following is a port of that transformation to InstCombine. --- llvm/test/Transforms/InstCombine/fsh.ll | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index 334c613c9e8d1..28c541e1a9eb2 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -1217,8 +1217,7 @@ define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) { define i32 @rot_const_consecutive(i32 %x) { ; CHECK-LABEL: @rot_const_consecutive( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 13) -; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 27) +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8) ; CHECK-NEXT: ret i32 [[R2]] ; %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 13) @@ -1229,7 +1228,7 @@ define i32 @rot_const_consecutive(i32 %x) { define i32 @rot_const_consecutive_multi_use(i32 %x) { ; CHECK-LABEL: @rot_const_consecutive_multi_use( ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7) -; CHECK-NEXT: [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 4) +; CHECK-NEXT: [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 11) ; CHECK-NEXT: [[R2:%.*]] = and i32 [[R]], [[R3]] ; CHECK-NEXT: ret i32 [[R2]] ; @@ -1241,9 +1240,7 @@ define i32 @rot_const_consecutive_multi_use(i32 %x) { define i32 @rot_const_consecutive_cancel_out(i32 %x) { ; CHECK-LABEL: @rot_const_consecutive_cancel_out( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X1:%.*]], i32 [[X1]], i32 7) -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 25) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 [[X:%.*]] ; %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7) %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 25) From 1b3e617369f4d44333b76124d76a3ac184f31c79 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Wed, 24 Sep 2025 21:31:43 -0600 Subject: [PATCH 3/6] added m_Deferred --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bcbe28a1080c4..e4e356bfd2d72 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2408,20 +2408,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // R = fshl(X, X, C2) // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize) - Value *InnerOp0; - Value *InnerOp1; + Value *InnerOp; Constant *ShAmtInnerC; - if (match(Op0, m_FShl(m_Value(InnerOp0), m_Value(InnerOp1), + if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp), m_ImmConstant(ShAmtInnerC))) && - Op0 == Op1 && InnerOp0 == InnerOp1) { + Op0 == Op1) { APInt Sum = ShAmtC->getUniqueInteger() + ShAmtInnerC->getUniqueInteger(); APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth)); if (Modulo.isZero()) - return replaceInstUsesWith(*II, InnerOp0); + return replaceInstUsesWith(*II, InnerOp); Constant *ModuloC = ConstantInt::get(Ty, Modulo); return CallInst::Create(cast(Op0)->getCalledFunction(), - {InnerOp0, InnerOp1, ModuloC}); + {InnerOp, InnerOp, ModuloC}); } } From 752eef061064eb64a1be1ce93160c275f4634362 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Thu, 25 Sep 2025 13:06:27 -0600 Subject: [PATCH 4/6] Allow non-splat consant vectors Co-authored-by: Yingwei Zheng --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e4e356bfd2d72..0ee15a8cdf905 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2414,7 +2414,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { m_ImmConstant(ShAmtInnerC))) && Op0 == Op1) { APInt Sum = - ShAmtC->getUniqueInteger() + ShAmtInnerC->getUniqueInteger(); + *ShAmtOuterC + *ShAmtInnerC; APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth)); if (Modulo.isZero()) return replaceInstUsesWith(*II, InnerOp); From 2d53365523a5aa858440db82be0e0ab177075c84 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Thu, 25 Sep 2025 13:10:20 -0600 Subject: [PATCH 5/6] Switch to matching on APInt Co-authored-by: Yingwei Zheng --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0ee15a8cdf905..d4c3a6c996491 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2409,9 +2409,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // R = fshl(X, X, C2) // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize) Value *InnerOp; - Constant *ShAmtInnerC; + const APInt *ShAmtInnerC, *ShAmtOuterC; if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp), - m_ImmConstant(ShAmtInnerC))) && + m_APInt(ShAmtInnerC))) && match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) { APInt Sum = *ShAmtOuterC + *ShAmtInnerC; From 52f3d1302fd761a2362284c790edae2c2cd02700 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Thu, 25 Sep 2025 13:18:10 -0600 Subject: [PATCH 6/6] clang format --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index d4c3a6c996491..263927f451c82 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2411,10 +2411,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *InnerOp; const APInt *ShAmtInnerC, *ShAmtOuterC; if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp), - m_APInt(ShAmtInnerC))) && match(ShAmtC, m_APInt(ShAmtOuterC)) && - Op0 == Op1) { - APInt Sum = - *ShAmtOuterC + *ShAmtInnerC; + m_APInt(ShAmtInnerC))) && + match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) { + APInt Sum = *ShAmtOuterC + *ShAmtInnerC; APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth)); if (Modulo.isZero()) return replaceInstUsesWith(*II, InnerOp);