Skip to content

Conversation

axelcool1234
Copy link
Contributor

The rotate transformation from https://github.com/llvm/llvm-project/blob/72c04bb882ad70230bce309c3013d9cc2c99e9a7/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L10312-L10337 has no middle-end equivalent in InstCombine. The following is a port of that transformation to InstCombine.

@axelcool1234 axelcool1234 requested a review from nikic as a code owner September 25, 2025 02:07
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Sep 25, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 25, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Axel Sorenson (axelcool1234)

Changes

The rotate transformation from https://github.com/llvm/llvm-project/blob/72c04bb882ad70230bce309c3013d9cc2c99e9a7/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L10312-L10337 has no middle-end equivalent in InstCombine. The following is a port of that transformation to InstCombine.


Full diff: https://github.com/llvm/llvm-project/pull/160628.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+18)
  • (modified) llvm/test/Transforms/InstCombine/fsh.ll (+72)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6ad493772d170..bcbe28a1080c4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2405,6 +2405,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
               matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
                                      /*MatchBitReversals*/ true))
         return BitOp;
+
+      // R = fshl(X, X, C2)
+      // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
+      Value *InnerOp0;
+      Value *InnerOp1;
+      Constant *ShAmtInnerC;
+      if (match(Op0, m_FShl(m_Value(InnerOp0), m_Value(InnerOp1),
+                            m_ImmConstant(ShAmtInnerC))) &&
+          Op0 == Op1 && InnerOp0 == InnerOp1) {
+        APInt Sum =
+            ShAmtC->getUniqueInteger() + ShAmtInnerC->getUniqueInteger();
+        APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
+        if (Modulo.isZero())
+          return replaceInstUsesWith(*II, InnerOp0);
+        Constant *ModuloC = ConstantInt::get(Ty, Modulo);
+        return CallInst::Create(cast<IntrinsicInst>(Op0)->getCalledFunction(),
+                                {InnerOp0, InnerOp1, ModuloC});
+      }
     }
 
     // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 0325c60997dfd..28c541e1a9eb2 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -1214,3 +1214,75 @@ define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) {
   %r = call i31 @llvm.fshr.i31(i31 %x, i31 %x, i31 %n)
   ret i31 %r
 }
+
+define i32 @rot_const_consecutive(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive(
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8)
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 13)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 27)
+  ret i32 %r2
+}
+
+define i32 @rot_const_consecutive_multi_use(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive_multi_use(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 11)
+; CHECK-NEXT:    [[R2:%.*]] = and i32 [[R]], [[R3]]
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 4)
+  %and = and i32 %r, %r2
+  ret i32 %and
+}
+
+define i32 @rot_const_consecutive_cancel_out(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive_cancel_out(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 25)
+  ret i32 %r2
+}
+
+;; negative test, consecutive rotates only fold if shift amounts are const
+
+define i32 @rot_nonconst_shift(i32 %x, i32 %amt) {
+; CHECK-LABEL: @rot_nonconst_shift(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 [[AMT:%.*]])
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 %amt)
+  ret i32 %r2
+}
+
+;; negative test, 1st funnel shift isn't a rotate.
+
+define i32 @fsh_rot(i32 %x, i32 %y) {
+; CHECK-LABEL: @fsh_rot(
+; CHECK-NEXT:    [[FSH:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[FSH]], i32 [[FSH]], i32 4)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %fsh = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
+  %r = call i32 @llvm.fshl.i32(i32 %fsh, i32 %fsh, i32 4)
+  ret i32 %r
+}
+
+;; negative test, 2nd funnel shift isn't a rotate.
+
+define i32 @rot_fsh(i32 %x, i32 %y) {
+; CHECK-LABEL: @rot_fsh(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[Y]], i32 [[R:%.*]], i32 4)
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %y, i32 4)
+  ret i32 %r2
+}
+

@dtcxzyw
Copy link
Member

dtcxzyw commented Sep 25, 2025

@zyw-bot mfuzz

@zyw-bot
Copy link

zyw-bot commented Sep 25, 2025

[WARN] GITHUB_MEMBERSHIP_TOKEN not set, skip membership check.

@dtcxzyw
Copy link
Member

dtcxzyw commented Sep 25, 2025

@zyw-bot mfuzz

Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
Copy link

github-actions bot commented Sep 25, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

axelcool1234 and others added 2 commits September 25, 2025 13:10
Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@mshockwave mshockwave merged commit dee28f9 into llvm:main Sep 27, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 27, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia-gcc7 running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/18949

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x5638116ef870 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x5638116ef870 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 27, 2025

LLVM Buildbot has detected a new failure on builder clang-ppc64le-linux-test-suite running on ppc64le-clang-test-suite while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/18447

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LeakSanitizer-AddressSanitizer-powerpc64le :: TestCases/create_thread_leak.cpp' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
/home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/./bin/clang  --driver-mode=g++ -O0  -m64 -fno-function-sections  -gline-tables-only -fsanitize=address -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/../ -pthread /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp # RUN: at line 3
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/./bin/clang --driver-mode=g++ -O0 -m64 -fno-function-sections -gline-tables-only -fsanitize=address -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/../ -pthread /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp
not  /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp 10 1 0 0 2>&1 | FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp --check-prefixes=LEAK,LEAK123 # RUN: at line 4
+ not /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp 10 1 0 0
+ FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp --check-prefixes=LEAK,LEAK123
not  /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp 10 0 1 0 2>&1 | FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp --check-prefixes=LEAK,LEAK234 # RUN: at line 5
+ not /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp 10 0 1 0
+ FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp --check-prefixes=LEAK,LEAK234
not  /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp 10 0 0 1 2>&1 | FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp --check-prefixes=LEAK,LEAK234 # RUN: at line 6
+ not /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/build/runtimes/runtimes-bins/compiler-rt/test/lsan/POWERPC64LEAsanConfig/TestCases/Output/create_thread_leak.cpp.tmp 10 0 0 1
+ FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp --check-prefixes=LEAK,LEAK234
FileCheck error: '<stdin>' is empty.
FileCheck command line:  FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-clang-test-suite/clang-ppc64le-test-suite/llvm-project/compiler-rt/test/lsan/TestCases/create_thread_leak.cpp --check-prefixes=LEAK,LEAK234

--

********************


mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
…bine (llvm#160628)

The rotate transformation from
https://github.com/llvm/llvm-project/blob/72c04bb882ad70230bce309c3013d9cc2c99e9a7/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L10312-L10337
has no middle-end equivalent in InstCombine. The following is a port of
that transformation to InstCombine.

---------

Co-authored-by: Yingwei Zheng <dtcxzyw@qq.com>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants