- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[DAGCombine] Improve bswap lowering for machines that support bit rotates #164848
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
| @llvm/pr-subscribers-llvm-selectiondag Author: AZero13 (AZero13) ChangesSource: Hacker's delight. Full diff: https://github.com/llvm/llvm-project/pull/164848.diff 3 Files Affected: 
 diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 920dff935daed..94eb45e2de0bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9899,6 +9899,14 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
     // Use a rotate by 8. This can be further expanded if necessary.
     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
   case MVT::i32:
+    if (isOperationLegal(ISD::ROTR, VT)) {
+      // (x & 0x00FF00FF) rotl 8 | (x rotr 8) & 0x00FF00FF
+      SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, DAG.getConstant(0x00FF00FF, dl, VT));
+      SDValue Rotl = DAG.getNode(ISD::ROTL, dl, VT, And, DAG.getConstant(8, dl, SHVT));
+      SDValue Rotr = DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+      SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotr, DAG.getConstant(0x00FF00FF, dl, VT));
+      return DAG.getNode(ISD::OR, dl, VT, Rotl, And2);
+    }
     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
                        DAG.getConstant(0xFF00, dl, VT));
diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
index 4b6d14efd0ecb..4f933b1ed780c 100644
--- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
@@ -53,14 +53,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
 ; BSWAP is not supported by 32 bit target
 ; CHECK-LABEL: load_i32_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -223,21 +221,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i64_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r2, #255
 ; CHECK-NEXT:    ldr r1, [r0]
-; CHECK-NEXT:    mov r12, #65280
 ; CHECK-NEXT:    ldr r0, [r0, #4]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r3, r12, r0, lsr #8
-; CHECK-NEXT:    orr r3, r3, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    and r2, r12, r1, lsr #8
-; CHECK-NEXT:    orr r0, r0, r3
-; CHECK-NEXT:    and r3, r1, #65280
-; CHECK-NEXT:    orr r2, r2, r1, lsr #24
-; CHECK-NEXT:    lsl r1, r1, #24
-; CHECK-NEXT:    orr r1, r1, r3, lsl #8
-; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    orr r2, r2, #16711680
+; CHECK-NEXT:    and r3, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r3, ror #24
+; CHECK-NEXT:    and r3, r1, r2
+; CHECK-NEXT:    and r1, r2, r1, lsr #8
+; CHECK-NEXT:    orr r1, r1, r3, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -377,14 +370,12 @@ define i64 @load_i64_by_i8(ptr %arg) {
 define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #1]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
@@ -434,14 +425,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #-4]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
@@ -587,14 +576,12 @@ declare i16 @llvm.bswap.i16(i16)
 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_bswap_i16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
@@ -667,14 +654,12 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    add r0, r0, r1
-; CHECK-NEXT:    mov r1, #65280
+; CHECK-NEXT:    mov r1, #255
+; CHECK-NEXT:    orr r1, r1, #16711680
 ; CHECK-NEXT:    ldr r0, [r0, #12]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
@@ -733,14 +718,12 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    add r0, r1, r0
-; CHECK-NEXT:    mov r1, #65280
+; CHECK-NEXT:    mov r1, #255
+; CHECK-NEXT:    orr r1, r1, #16711680
 ; CHECK-NEXT:    ldr r0, [r0, #13]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
diff --git a/llvm/test/CodeGen/ARM/load-combine.ll b/llvm/test/CodeGen/ARM/load-combine.ll
index 0f6ec8aa47386..7dcf59a2af18b 100644
--- a/llvm/test/CodeGen/ARM/load-combine.ll
+++ b/llvm/test/CodeGen/ARM/load-combine.ll
@@ -117,14 +117,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
 ; BSWAP is not supported by 32 bit target
 ; CHECK-LABEL: load_i32_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -237,21 +235,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i64_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r2, #255
 ; CHECK-NEXT:    ldr r1, [r0]
-; CHECK-NEXT:    mov r12, #65280
 ; CHECK-NEXT:    ldr r0, [r0, #4]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r3, r12, r0, lsr #8
-; CHECK-NEXT:    orr r3, r3, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    and r2, r12, r1, lsr #8
-; CHECK-NEXT:    orr r0, r0, r3
-; CHECK-NEXT:    and r3, r1, #65280
-; CHECK-NEXT:    orr r2, r2, r1, lsr #24
-; CHECK-NEXT:    lsl r1, r1, #24
-; CHECK-NEXT:    orr r1, r1, r3, lsl #8
-; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    orr r2, r2, #16711680
+; CHECK-NEXT:    and r3, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r3, ror #24
+; CHECK-NEXT:    and r3, r1, r2
+; CHECK-NEXT:    and r1, r2, r1, lsr #8
+; CHECK-NEXT:    orr r1, r1, r3, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -413,14 +406,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #1]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
@@ -469,14 +460,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #-4]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
@@ -527,14 +516,12 @@ declare i16 @llvm.bswap.i16(i16)
 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_bswap_i16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
 | 
| @llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesSource: Hacker's delight. Full diff: https://github.com/llvm/llvm-project/pull/164848.diff 3 Files Affected: 
 diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 920dff935daed..94eb45e2de0bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9899,6 +9899,14 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
     // Use a rotate by 8. This can be further expanded if necessary.
     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
   case MVT::i32:
+    if (isOperationLegal(ISD::ROTR, VT)) {
+      // (x & 0x00FF00FF) rotl 8 | (x rotr 8) & 0x00FF00FF
+      SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, DAG.getConstant(0x00FF00FF, dl, VT));
+      SDValue Rotl = DAG.getNode(ISD::ROTL, dl, VT, And, DAG.getConstant(8, dl, SHVT));
+      SDValue Rotr = DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+      SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotr, DAG.getConstant(0x00FF00FF, dl, VT));
+      return DAG.getNode(ISD::OR, dl, VT, Rotl, And2);
+    }
     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
                        DAG.getConstant(0xFF00, dl, VT));
diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
index 4b6d14efd0ecb..4f933b1ed780c 100644
--- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
@@ -53,14 +53,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
 ; BSWAP is not supported by 32 bit target
 ; CHECK-LABEL: load_i32_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -223,21 +221,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i64_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r2, #255
 ; CHECK-NEXT:    ldr r1, [r0]
-; CHECK-NEXT:    mov r12, #65280
 ; CHECK-NEXT:    ldr r0, [r0, #4]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r3, r12, r0, lsr #8
-; CHECK-NEXT:    orr r3, r3, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    and r2, r12, r1, lsr #8
-; CHECK-NEXT:    orr r0, r0, r3
-; CHECK-NEXT:    and r3, r1, #65280
-; CHECK-NEXT:    orr r2, r2, r1, lsr #24
-; CHECK-NEXT:    lsl r1, r1, #24
-; CHECK-NEXT:    orr r1, r1, r3, lsl #8
-; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    orr r2, r2, #16711680
+; CHECK-NEXT:    and r3, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r3, ror #24
+; CHECK-NEXT:    and r3, r1, r2
+; CHECK-NEXT:    and r1, r2, r1, lsr #8
+; CHECK-NEXT:    orr r1, r1, r3, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -377,14 +370,12 @@ define i64 @load_i64_by_i8(ptr %arg) {
 define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #1]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
@@ -434,14 +425,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #-4]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
@@ -587,14 +576,12 @@ declare i16 @llvm.bswap.i16(i16)
 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_bswap_i16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
@@ -667,14 +654,12 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    add r0, r0, r1
-; CHECK-NEXT:    mov r1, #65280
+; CHECK-NEXT:    mov r1, #255
+; CHECK-NEXT:    orr r1, r1, #16711680
 ; CHECK-NEXT:    ldr r0, [r0, #12]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
@@ -733,14 +718,12 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    add r0, r1, r0
-; CHECK-NEXT:    mov r1, #65280
+; CHECK-NEXT:    mov r1, #255
+; CHECK-NEXT:    orr r1, r1, #16711680
 ; CHECK-NEXT:    ldr r0, [r0, #13]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
diff --git a/llvm/test/CodeGen/ARM/load-combine.ll b/llvm/test/CodeGen/ARM/load-combine.ll
index 0f6ec8aa47386..7dcf59a2af18b 100644
--- a/llvm/test/CodeGen/ARM/load-combine.ll
+++ b/llvm/test/CodeGen/ARM/load-combine.ll
@@ -117,14 +117,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
 ; BSWAP is not supported by 32 bit target
 ; CHECK-LABEL: load_i32_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -237,21 +235,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i64_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r2, #255
 ; CHECK-NEXT:    ldr r1, [r0]
-; CHECK-NEXT:    mov r12, #65280
 ; CHECK-NEXT:    ldr r0, [r0, #4]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r3, r12, r0, lsr #8
-; CHECK-NEXT:    orr r3, r3, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    and r2, r12, r1, lsr #8
-; CHECK-NEXT:    orr r0, r0, r3
-; CHECK-NEXT:    and r3, r1, #65280
-; CHECK-NEXT:    orr r2, r2, r1, lsr #24
-; CHECK-NEXT:    lsl r1, r1, #24
-; CHECK-NEXT:    orr r1, r1, r3, lsl #8
-; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    orr r2, r2, #16711680
+; CHECK-NEXT:    and r3, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r3, ror #24
+; CHECK-NEXT:    and r3, r1, r2
+; CHECK-NEXT:    and r1, r2, r1, lsr #8
+; CHECK-NEXT:    orr r1, r1, r3, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -413,14 +406,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #1]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
@@ -469,14 +460,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #-4]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
@@ -527,14 +516,12 @@ declare i16 @llvm.bswap.i16(i16)
 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_bswap_i16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
 | 
| ✅ With the latest revision this PR passed the C/C++ code formatter. | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
alive2 proof?
| case MVT::i32: | ||
| if (isOperationLegal(ISD::ROTR, VT)) { | ||
| // (x & 0x00FF00FF) rotl 8 | (x rotr 8) & 0x00FF00FF | ||
| SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull out repeated and mask constant
| 
 I'm pretty sure you need a final rotate by 16 on the result | 
| 
 I'm pretty sure you don't! | 
| Still on mobile - but the codegen still doesn't look right - are the rotates the right way around? | 
| 
 Well spotted - no they're not. | 
| // Use a rotate by 8. This can be further expanded if necessary. | ||
| return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); | ||
| case MVT::i32: | ||
| if (isOperationLegal(ISD::ROTR, VT)) { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
legal or custom?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was thinking legal because it only works well on ARM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most everywhere that checks legality for profitability concerns should be using isLegalOrCustom
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most everywhere that checks legality for profitability concerns should be using isLegalOrCustom
Fixed!
…ates Source: Hacker's delight.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| Thank you! | 
| I do not have merge permissions by the way | 
| @topperc can we please merge | 
…ates (llvm#164848) Source: Hacker's delight.
…ates (llvm#164848) Source: Hacker's delight.
…ates (llvm#164848) Source: Hacker's delight.

Source: Hacker's delight.