-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[DAG] MatchLoadCombine - match swapped loads #167416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
GrumpyPigSkin
wants to merge
7
commits into
llvm:main
Choose a base branch
from
GrumpyPigSkin:dag-matchloadcombine-match-swapped-loads
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
[DAG] MatchLoadCombine - match swapped loads #167416
GrumpyPigSkin
wants to merge
7
commits into
llvm:main
from
GrumpyPigSkin:dag-matchloadcombine-match-swapped-loads
+174
−48
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (GrumpyPigSkin) ChangesAdded check to MatchLoadCombine combine two i32 loads into a i64 load + rotate Closes #167314 Full diff: https://github.com/llvm/llvm-project/pull/167416.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4f2eb1e64dbe0..4ae239bc58ef4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9772,12 +9772,40 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
MemVT))
return SDValue();
+ auto IsRotateLoaded = [](ArrayRef<int64_t> ByteOffsets, int64_t FirstOffset,
+ unsigned BitWidth) {
+ // Ensure that we have the correct width type, we want to combine two 32
+ // loads into a 64 bit load.
+ if (BitWidth != 64 || ByteOffsets.size() != 8)
+ return false;
+
+ constexpr unsigned FourBytes = 4;
+
+ for (unsigned i = 0; i < FourBytes; ++i) {
+ // Check the lower 4 bytes come from the higher memory address.
+ if (ByteOffsets[i] != FirstOffset + i + FourBytes)
+ return false;
+ // Check the higher 4 bytes come from the lower memory adderess.
+ if (ByteOffsets[i + FourBytes] != FirstOffset + i)
+ return false;
+ }
+ return true;
+ };
+
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
std::optional<bool> IsBigEndian = isBigEndian(
ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
- if (!IsBigEndian)
- return SDValue();
+
+ bool IsRotated = false;
+ if (!IsBigEndian) {
+ IsRotated =
+ IsRotateLoaded(ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes),
+ FirstOffset, VT.getSizeInBits());
+
+ if (!IsRotated)
+ return SDValue();
+ }
assert(FirstByteProvider && "must be set");
@@ -9791,8 +9819,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// replace it with a single (possibly zero-extended) load and bswap + shift if
// needed.
- // If the load needs byte swap check if the target supports it
- bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
+ // If the load needs byte swap check if the target supports it, make sure that
+ // we are not rotating.
+ bool NeedsBswap = !IsRotated && (IsBigEndianTarget != *IsBigEndian);
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
@@ -9803,8 +9832,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
- // If we need to bswap and zero extend, we have to insert a shift. Check that
- // it is legal.
+ // If we need to rotate make sure that is legal.
+ if (IsRotated && LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT))
+ return SDValue();
+
+ // If we need to bswap and zero extend, we have to insert a shift. Check
+ // thatunsigned Fast = 0; it is legal.
if (NeedsBswap && NeedsZext && LegalOperations &&
!TLI.isOperationLegal(ISD::SHL, VT))
return SDValue();
@@ -9826,15 +9859,33 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
for (LoadSDNode *L : Loads)
DAG.makeEquivalentMemoryOrdering(L, NewLoad);
- if (!NeedsBswap)
+ // If no transform is needed then return the new load.
+ if (!NeedsBswap && !IsRotated)
return NewLoad;
- SDValue ShiftedLoad =
- NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
- DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
- VT, SDLoc(N)))
- : NewLoad;
- return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
+ // If we detect the need to BSWAP build the new node and return it.
+ if (NeedsBswap) {
+ SDValue ShiftedLoad =
+ NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+ DAG.getShiftAmountConstant(
+ ZeroExtendedBytes * 8, VT, SDLoc(N)))
+ : NewLoad;
+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
+ }
+
+ // If we detect we need to rotate build the new ROTR node.
+ if (IsRotated) {
+ // The amount to rotate is half that of the size, i.e 32 bits for an i64
+ unsigned RotateAmount = VT.getSizeInBits() / 2;
+
+ EVT ShiftAmountTy =
+ TLI.getShiftAmountTy(NewLoad.getValueType(), DAG.getDataLayout());
+
+ return DAG.getNode(ISD::ROTR, SDLoc(N), VT, NewLoad,
+ DAG.getConstant(RotateAmount, SDLoc(N), ShiftAmountTy));
+ }
+
+ llvm_unreachable("Should have returned a transformed load value");
}
// If the target has andn, bsl, or a similar bit-select instruction,
diff --git a/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll b/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll
new file mode 100644
index 0000000000000..b08f2fbbc56e4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; This test checks that a pattern of two 32-bit loads, which are combined
+; to form a 64-bit value with swapped words, is optimized into a single
+; 64-bit load followed by a 32-bit rotate.
+
+define i64 @test_load_bswap_to_rotate(ptr %p) {
+; CHECK-LABEL: test_load_bswap_to_rotate:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: rorq $32, %rax
+; CHECK-NEXT: retq
+
+ %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4
+ %lo = load i32, ptr %p
+ %hi = load i32, ptr %p.hi
+ %conv = zext i32 %lo to i64
+ %shl = shl nuw i64 %conv, 32
+ %conv2 = zext i32 %hi to i64
+ %or = or disjoint i64 %shl, %conv2
+ ret i64 %or
+}
|
Contributor
Author
|
@RKSimon please review :) |
arsenm
reviewed
Nov 10, 2025
RKSimon
requested changes
Nov 11, 2025
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
RKSimon
reviewed
Nov 11, 2025
bb99659 to
55f06ae
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Added check to MatchLoadCombine combine two i32 loads into a i64 load + rotate
Closes #167314