diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4f2eb1e64dbe0..7d324c031528d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9398,6 +9398,35 @@ static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
   return BigEndian;
 }
 
+// Determines if multiple bytes loaded into a register
+// corresponds to loading a single, contiguous block of bytes from memory and
+// then perform a bitwise right rotation. Returns the rotation amount or
+// std::nullopt if we can't match the pattern.
+static std::optional<unsigned> getRotationAmount(ArrayRef<int64_t> ByteOffsets,
+                                                 int64_t FirstOffset) {
+  unsigned ByteWidth = ByteOffsets.size();
+  if (ByteWidth == 0)
+    return std::nullopt;
+
+  int64_t FirstByteActualOffset = ByteOffsets[0];
+  int64_t RotateAmtInBytes = FirstByteActualOffset - FirstOffset;
+
+  // Check the rotation amount is valid
+  if (RotateAmtInBytes < 0 || RotateAmtInBytes >= ByteWidth)
+    return std::nullopt;
+
+  // Make sure each of the following loads follow the same rotational pattern.
+  for (unsigned I = 0; I < ByteWidth; ++I) {
+    int64_t ExpectedOffset = FirstOffset + ((I + RotateAmtInBytes) % ByteWidth);
+    if (ByteOffsets[I] != ExpectedOffset) {
+      return std::nullopt;
+    }
+  }
+
+  // Return the rotation amount in bits.
+  return RotateAmtInBytes * 8;
+}
+
 // Look through one layer of truncate or extend.
 static SDValue stripTruncAndExt(SDValue Value) {
   switch (Value.getOpcode()) {
@@ -9776,65 +9805,99 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   // little endian value load
   std::optional<bool> IsBigEndian = isBigEndian(
       ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
-  if (!IsBigEndian)
-    return SDValue();
 
-  assert(FirstByteProvider && "must be set");
+  // Handle the standard load combine.
+  if (IsBigEndian) {
+    bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
+
+    // Before legalize we can introduce illegal bswaps which will be later
+    // converted to an explicit bswap sequence. This way we end up with a single
+    // load and byte shuffling instead of several loads and byte shuffling.
+    // We do not introduce illegal bswaps when zero-extending as this tends to
+    // introduce too many arithmetic instructions.
+    if (NeedsBswap && (LegalOperations || NeedsZext) &&
+        !TLI.isOperationLegal(ISD::BSWAP, VT))
+      return SDValue();
 
-  // Ensure that the first byte is loaded from zero offset of the first load.
-  // So the combined value can be loaded from the first load address.
-  if (MemoryByteOffset(*FirstByteProvider) != 0)
-    return SDValue();
-  auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
+    // If we need to bswap and zero extend, we have to insert a shift. Check
+    // thatunsigned Fast = 0; it is legal.
+    if (NeedsBswap && NeedsZext && LegalOperations &&
+        !TLI.isOperationLegal(ISD::SHL, VT))
+      return SDValue();
 
-  // The node we are looking at matches with the pattern, check if we can
-  // replace it with a single (possibly zero-extended) load and bswap + shift if
-  // needed.
+    auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
+    if (MemoryByteOffset(*FirstByteProvider) != 0)
+      return SDValue();
 
-  // If the load needs byte swap check if the target supports it
-  bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
+    // Check that a load of the wide type is both allowed and fast on the target
+    unsigned Fast = 0;
+    if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+                                *FirstLoad->getMemOperand(), &Fast) ||
+        !Fast)
+      return SDValue();
 
-  // Before legalize we can introduce illegal bswaps which will be later
-  // converted to an explicit bswap sequence. This way we end up with a single
-  // load and byte shuffling instead of several loads and byte shuffling.
-  // We do not introduce illegal bswaps when zero-extending as this tends to
-  // introduce too many arithmetic instructions.
-  if (NeedsBswap && (LegalOperations || NeedsZext) &&
-      !TLI.isOperationLegal(ISD::BSWAP, VT))
-    return SDValue();
+    SDValue NewLoad = DAG.getExtLoad(
+        NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT, Chain,
+        FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), MemVT,
+        FirstLoad->getAlign());
 
-  // If we need to bswap and zero extend, we have to insert a shift. Check that
-  // it is legal.
-  if (NeedsBswap && NeedsZext && LegalOperations &&
-      !TLI.isOperationLegal(ISD::SHL, VT))
-    return SDValue();
+    for (LoadSDNode *L : Loads)
+      DAG.makeEquivalentMemoryOrdering(L, NewLoad);
 
-  // Check that a load of the wide type is both allowed and fast on the target
-  unsigned Fast = 0;
-  bool Allowed =
-      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
-                             *FirstLoad->getMemOperand(), &Fast);
-  if (!Allowed || !Fast)
-    return SDValue();
+    // It is a simple combine.
+    if (!NeedsBswap)
+      return NewLoad;
 
-  SDValue NewLoad =
-      DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
-                     Chain, FirstLoad->getBasePtr(),
-                     FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
+    // It is a BSWAP combine.
+    SDValue ShiftedLoad =
+        NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+                                DAG.getShiftAmountConstant(
+                                    ZeroExtendedBytes * 8, VT, SDLoc(N)))
+                  : NewLoad;
+    return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
+  }
 
-  // Transfer chain users from old loads to the new load.
-  for (LoadSDNode *L : Loads)
-    DAG.makeEquivalentMemoryOrdering(L, NewLoad);
+  // Handle the rotated load combine.
+  if (auto RotateAmt = getRotationAmount(
+          ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset)) {
 
-  if (!NeedsBswap)
-    return NewLoad;
+    // Make sure we can rotate
+    if (LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT))
+      return SDValue();
 
-  SDValue ShiftedLoad =
-      NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
-                              DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
-                                                         VT, SDLoc(N)))
-                : NewLoad;
-  return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
+    auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
+    if (MemoryByteOffset(*FirstByteProvider) != 0)
+      return SDValue();
+
+    // Make sure the operation is legal and fast.
+    unsigned Fast = 0;
+    if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+                                *FirstLoad->getMemOperand(), &Fast) ||
+        !Fast)
+      return SDValue();
+
+    // Create the new load, rotate and then zero extend after if we need to.
+    SDValue NewLoad =
+        DAG.getLoad(MemVT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
+                    FirstLoad->getPointerInfo());
+
+    for (LoadSDNode *L : Loads)
+      DAG.makeEquivalentMemoryOrdering(L, NewLoad);
+
+    EVT ShiftAmountTy =
+        TLI.getShiftAmountTy(NewLoad.getValueType(), DAG.getDataLayout());
+    SDValue Rotated =
+        DAG.getNode(ISD::ROTR, SDLoc(N), MemVT, NewLoad,
+                    DAG.getConstant(*RotateAmt, SDLoc(N), ShiftAmountTy));
+
+    if (NeedsZext)
+      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Rotated);
+
+    return Rotated;
+  }
+
+  // No pattern matched.
+  return SDValue();
 }
 
 // If the target has andn, bsl, or a similar bit-select instruction,
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index f21c07599d6f1..3fb8cfe3c81da 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -1314,3 +1314,66 @@ define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
   %res = or i32 %e1.ext.shift, %e0.ext
   ret i32 %res
 }
+
+define i64 @test_load_bswap_to_rotate(ptr %p) {
+; CHECK-LABEL: test_load_bswap_to_rotate:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl (%eax), %edx
+; CHECK-NEXT:    movl 4(%eax), %eax
+; CHECK-NEXT:    retl
+;
+; CHECK64-LABEL: test_load_bswap_to_rotate:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    movq (%rdi), %rax
+; CHECK64-NEXT:    rorq $32, %rax
+; CHECK64-NEXT:    retq
+
+  %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4
+  %lo = load i32, ptr %p
+  %hi = load i32, ptr %p.hi
+  %conv = zext i32 %lo to i64
+  %shl = shl nuw i64 %conv, 32
+  %conv2 = zext i32 %hi to i64
+  %or = or disjoint i64 %shl, %conv2
+  ret i64 %or
+}
+
+define i64 @test_load_rotate_zext(ptr %p) {
+; CHECK-LABEL: test_load_rotate_zext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl (%eax), %eax
+; CHECK-NEXT:    rorl $8, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    retl
+;
+; CHECK64-LABEL: test_load_rotate_zext:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    movl (%rdi), %eax
+; CHECK64-NEXT:    rorl $8, %eax
+; CHECK64-NEXT:    retq
+  %p1 = getelementptr inbounds i8, ptr %p, i64 1
+  %l1 = load i8, ptr %p1, align 1
+  %e1 = zext i8 %l1 to i64
+
+  %p2 = getelementptr inbounds i8, ptr %p, i64 2
+  %l2 = load i8, ptr %p2, align 1
+  %e2 = zext i8 %l2 to i64
+  %s2 = shl i64 %e2, 8
+
+  %p3 = getelementptr inbounds i8, ptr %p, i64 3
+  %l3 = load i8, ptr %p3, align 1
+  %e3 = zext i8 %l3 to i64
+  %s3 = shl i64 %e3, 16
+
+  %p0 = getelementptr inbounds i8, ptr %p, i64 0
+  %l0 = load i8, ptr %p0, align 1
+  %e0 = zext i8 %l0 to i64
+  %s0 = shl i64 %e0, 24
+
+  %or1 = or i64 %e1, %s2
+  %or2 = or i64 %or1, %s3
+  %or3 = or i64 %or2, %s0
+  ret i64 %or3
+}