Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9899,6 +9899,18 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
// Use a rotate by 8. This can be further expanded if necessary.
return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
case MVT::i32:
// This is meant for ARM speficially, which has ROTR but no ROTL.
if (isOperationLegalOrCustom(ISD::ROTR, VT)) {
SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
// (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
SDValue Rotr =
DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
SDValue Rotl =
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
}
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
DAG.getConstant(0xFF00, dl, VT));
Expand Down
93 changes: 38 additions & 55 deletions llvm/test/CodeGen/ARM/load-combine-big-endian.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
; BSWAP is not supported by 32 bit target
; CHECK-LABEL: load_i32_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
Expand Down Expand Up @@ -223,21 +221,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
define i64 @load_i64_by_i8_bswap(ptr %arg) {
; CHECK-LABEL: load_i64_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r2, #255
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: mov r12, #65280
; CHECK-NEXT: ldr r0, [r0, #4]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r3, r12, r0, lsr #8
; CHECK-NEXT: orr r3, r3, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: and r2, r12, r1, lsr #8
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: and r3, r1, #65280
; CHECK-NEXT: orr r2, r2, r1, lsr #24
; CHECK-NEXT: lsl r1, r1, #24
; CHECK-NEXT: orr r1, r1, r3, lsl #8
; CHECK-NEXT: orr r1, r1, r2
; CHECK-NEXT: orr r2, r2, #16711680
; CHECK-NEXT: and r3, r0, r2
; CHECK-NEXT: and r0, r2, r0, ror #24
; CHECK-NEXT: orr r0, r0, r3, ror #8
; CHECK-NEXT: and r3, r1, r2
; CHECK-NEXT: and r1, r2, r1, ror #24
; CHECK-NEXT: orr r1, r1, r3, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
Expand Down Expand Up @@ -377,14 +370,12 @@ define i64 @load_i64_by_i8(ptr %arg) {
define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #1]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
Expand Down Expand Up @@ -434,14 +425,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #-4]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
Expand Down Expand Up @@ -587,14 +576,12 @@ declare i16 @llvm.bswap.i16(i16)
define i32 @load_i32_by_bswap_i16(ptr %arg) {
; CHECK-LABEL: load_i32_by_bswap_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
Expand Down Expand Up @@ -667,14 +654,12 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r0, r1
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: ldr r0, [r0, #12]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
Expand Down Expand Up @@ -733,14 +718,12 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r1, r0
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: ldr r0, [r0, #13]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
Expand Down
69 changes: 28 additions & 41 deletions llvm/test/CodeGen/ARM/load-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
; BSWAP is not supported by 32 bit target
; CHECK-LABEL: load_i32_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
Expand Down Expand Up @@ -237,21 +235,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
define i64 @load_i64_by_i8_bswap(ptr %arg) {
; CHECK-LABEL: load_i64_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r2, #255
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: mov r12, #65280
; CHECK-NEXT: ldr r0, [r0, #4]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r3, r12, r0, lsr #8
; CHECK-NEXT: orr r3, r3, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: and r2, r12, r1, lsr #8
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: and r3, r1, #65280
; CHECK-NEXT: orr r2, r2, r1, lsr #24
; CHECK-NEXT: lsl r1, r1, #24
; CHECK-NEXT: orr r1, r1, r3, lsl #8
; CHECK-NEXT: orr r1, r1, r2
; CHECK-NEXT: orr r2, r2, #16711680
; CHECK-NEXT: and r3, r0, r2
; CHECK-NEXT: and r0, r2, r0, ror #24
; CHECK-NEXT: orr r0, r0, r3, ror #8
; CHECK-NEXT: and r3, r1, r2
; CHECK-NEXT: and r1, r2, r1, ror #24
; CHECK-NEXT: orr r1, r1, r3, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
Expand Down Expand Up @@ -413,14 +406,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #1]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
Expand Down Expand Up @@ -469,14 +460,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0, #-4]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
Expand Down Expand Up @@ -527,14 +516,12 @@ declare i16 @llvm.bswap.i16(i16)
define i32 @load_i32_by_bswap_i16(ptr %arg) {
; CHECK-LABEL: load_i32_by_bswap_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: mov r1, #255
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: orr r1, r1, #16711680
; CHECK-NEXT: and r2, r0, r1
; CHECK-NEXT: and r0, r1, r0, ror #24
; CHECK-NEXT: orr r0, r0, r2, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
Expand Down