Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,

// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);

bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
Expand Down Expand Up @@ -9508,6 +9508,42 @@ static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
return false;
}

static SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a generic expansion that doesn't belong in target code. Don't we already have this?


// eor r1, r0, r0, ror #16
// bic r1, r1, #0xff0000
// mov r1, r1, lsr #8
// eor r0, r1, r0, ror #8

SDLoc DL(Op);
SDValue Src = Op.getOperand(0);

// ror rtmp, r0, #16
SDValue Ror16 = DAG.getNode(ISD::ROTR, DL, MVT::i32, Src,
DAG.getConstant(16, DL, MVT::i32));
// eor r1, r0, rtmp ; r1 = r0 ^ (r0 ror 16)
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, MVT::i32, Src, Ror16);

// bic r1, r1, #0xff0000 (clear bits 16-23)
// BIC r1, r1, #0xff0000 becomes AND r1, r1, ~0x00ff0000
// So we need the negated value: ~0x00FF0000 = 0xFF00FFFF
SDValue Mask = DAG.getConstant(0xFF00FFFFu, DL, MVT::i32);
SDValue BicResult = DAG.getNode(ISD::AND, DL, MVT::i32, Xor1, Mask);

// mov r1, r1, lsr #8
SDValue Lsr8 = DAG.getNode(ISD::SRL, DL, MVT::i32, BicResult,
DAG.getConstant(8, DL, MVT::i32));

// ror r0, r0, #8
SDValue Ror8 = DAG.getNode(ISD::ROTR, DL, MVT::i32, Src,
DAG.getConstant(8, DL, MVT::i32));

// eor r0, Lsr8, Ror8
SDValue Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Lsr8, Ror8);

return Result;
}

static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
Expand Down Expand Up @@ -10708,6 +10744,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UCMP:
case ISD::SCMP:
return LowerCMP(Op, DAG);
case ISD::BSWAP:
return LowerBSWAP(Op, DAG);
}
}

Expand Down
26 changes: 0 additions & 26 deletions llvm/lib/Target/ARM/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -606,32 +606,6 @@ constant which was already loaded). Not sure what's necessary to do that.

//===---------------------------------------------------------------------===//

The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:

int a(int x) { return __builtin_bswap32(x); }

a:
mov r1, #255, 24
mov r2, #255, 16
and r1, r1, r0, lsr #8
and r2, r2, r0, lsl #8
orr r1, r1, r0, lsr #24
orr r0, r2, r0, lsl #24
orr r0, r0, r1
bx lr

Something like the following would be better (fewer instructions/registers):
eor r1, r0, r0, ror #16
bic r1, r1, #0xff0000
mov r1, r1, lsr #8
eor r0, r1, r0, ror #8
bx lr

A custom Thumb version would also be a slight improvement over the generic
version.

//===---------------------------------------------------------------------===//

Consider the following simple C code:

void foo(unsigned char *a, unsigned char *b, int *c) {
Expand Down
101 changes: 46 additions & 55 deletions llvm/test/CodeGen/ARM/load-combine-big-endian.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
Expand Down Expand Up @@ -224,20 +223,17 @@ define i64 @load_i64_by_i8_bswap(ptr %arg) {
; CHECK-LABEL: load_i64_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: mov r12, #65280
; CHECK-NEXT: mov r3, #65280
; CHECK-NEXT: ldr r0, [r0, #4]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r3, r12, r0, lsr #8
; CHECK-NEXT: orr r3, r3, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: and r2, r12, r1, lsr #8
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: and r3, r1, #65280
; CHECK-NEXT: orr r2, r2, r1, lsr #24
; CHECK-NEXT: lsl r1, r1, #24
; CHECK-NEXT: orr r1, r1, r3, lsl #8
; CHECK-NEXT: orr r1, r1, r2
; CHECK-NEXT: orr r3, r3, #-16777216
; CHECK-NEXT: eor r2, r0, r0, ror #16
; CHECK-NEXT: and r2, r2, r3
; CHECK-NEXT: lsr r2, r2, #8
; CHECK-NEXT: eor r0, r2, r0, ror #8
; CHECK-NEXT: eor r2, r1, r1, ror #16
; CHECK-NEXT: and r2, r2, r3
; CHECK-NEXT: lsr r2, r2, #8
; CHECK-NEXT: eor r1, r2, r1, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
Expand Down Expand Up @@ -378,13 +374,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0, #1]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
Expand Down Expand Up @@ -435,13 +430,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0, #-4]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
Expand Down Expand Up @@ -588,13 +582,12 @@ define i32 @load_i32_by_bswap_i16(ptr %arg) {
; CHECK-LABEL: load_i32_by_bswap_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
Expand Down Expand Up @@ -667,14 +660,13 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r0, r1
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: ldr r0, [r0, #12]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
Expand Down Expand Up @@ -733,14 +725,13 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r1, r0
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: ldr r0, [r0, #13]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
Expand Down
76 changes: 34 additions & 42 deletions llvm/test/CodeGen/ARM/load-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,15 @@ define i32 @load_i32_by_i8_aligned(ptr %arg) {
; ptr p; // p is 4 byte aligned
; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
define i32 @load_i32_by_i8_bswap(ptr %arg) {
; BSWAP is not supported by 32 bit target
; CHECK-LABEL: load_i32_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
Expand Down Expand Up @@ -238,20 +236,17 @@ define i64 @load_i64_by_i8_bswap(ptr %arg) {
; CHECK-LABEL: load_i64_by_i8_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: mov r12, #65280
; CHECK-NEXT: mov r3, #65280
; CHECK-NEXT: ldr r0, [r0, #4]
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r3, r12, r0, lsr #8
; CHECK-NEXT: orr r3, r3, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: and r2, r12, r1, lsr #8
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: and r3, r1, #65280
; CHECK-NEXT: orr r2, r2, r1, lsr #24
; CHECK-NEXT: lsl r1, r1, #24
; CHECK-NEXT: orr r1, r1, r3, lsl #8
; CHECK-NEXT: orr r1, r1, r2
; CHECK-NEXT: orr r3, r3, #-16777216
; CHECK-NEXT: eor r2, r0, r0, ror #16
; CHECK-NEXT: and r2, r2, r3
; CHECK-NEXT: lsr r2, r2, #8
; CHECK-NEXT: eor r0, r2, r0, ror #8
; CHECK-NEXT: eor r2, r1, r1, ror #16
; CHECK-NEXT: and r2, r2, r3
; CHECK-NEXT: lsr r2, r2, #8
; CHECK-NEXT: eor r1, r2, r1, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
Expand Down Expand Up @@ -414,13 +409,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0, #1]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
Expand Down Expand Up @@ -470,13 +464,12 @@ define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0, #-4]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
Expand Down Expand Up @@ -528,13 +521,12 @@ define i32 @load_i32_by_bswap_i16(ptr %arg) {
; CHECK-LABEL: load_i32_by_bswap_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: and r2, r0, #65280
; CHECK-NEXT: and r1, r1, r0, lsr #8
; CHECK-NEXT: orr r1, r1, r0, lsr #24
; CHECK-NEXT: lsl r0, r0, #24
; CHECK-NEXT: orr r0, r0, r2, lsl #8
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov r2, #65280
; CHECK-NEXT: orr r2, r2, #-16777216
; CHECK-NEXT: eor r1, r0, r0, ror #16
; CHECK-NEXT: and r1, r1, r2
; CHECK-NEXT: lsr r1, r1, #8
; CHECK-NEXT: eor r0, r1, r0, ror #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
Expand Down
Loading