Skip to content

Commit 4bfbcb5

Browse files
committed
[SelectionDAG] Optimize BSWAP yet again once more
1 parent fc95ffb commit 4bfbcb5

File tree

4 files changed

+78
-101
lines changed

4 files changed

+78
-101
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9901,15 +9901,28 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
99019901
case MVT::i32:
99029902
// This is meant for ARM speficially, which has ROTR but no ROTL.
99039903
if (isOperationLegalOrCustom(ISD::ROTR, VT)) {
9904-
SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
9905-
// (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
9906-
SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
9907-
SDValue Rotr =
9908-
DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
9909-
SDValue Rotl =
9910-
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9911-
SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
9912-
return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
9904+
// ror rtmp, r0, #16
9905+
SDValue Ror16 =
9906+
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
9907+
// eor r1, r0, rtmp ; r1 = r0 ^ (r0 ror 16)
9908+
SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, Op, Ror16);
9909+
9910+
// bic r1, r1, #0xff0000 (clear bits 16-23)
9911+
// BIC r1, r1, #0xff0000 becomes AND r1, r1, ~0x00ff0000
9912+
// So we need the negated value: ~0x00FF0000 = 0xFF00FFFF
9913+
SDValue Mask = DAG.getConstant(0xFF00FFFFu, dl, VT);
9914+
SDValue BicResult = DAG.getNode(ISD::AND, dl, VT, Xor1, Mask);
9915+
9916+
// mov r1, r1, lsr #8
9917+
SDValue Lsr8 = DAG.getNode(ISD::SRL, dl, VT, BicResult,
9918+
DAG.getConstant(8, dl, SHVT));
9919+
9920+
// ror r0, r0, #8
9921+
SDValue Ror8 =
9922+
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9923+
9924+
// eor r0, Lsr8, Ror8
9925+
return DAG.getNode(ISD::XOR, dl, VT, Lsr8, Ror8);
99139926
}
99149927
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
99159928
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,

llvm/lib/Target/ARM/README.txt

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -606,32 +606,6 @@ constant which was already loaded). Not sure what's necessary to do that.
606606

607607
//===---------------------------------------------------------------------===//
608608

609-
The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
610-
611-
int a(int x) { return __builtin_bswap32(x); }
612-
613-
a:
614-
mov r1, #255, 24
615-
mov r2, #255, 16
616-
and r1, r1, r0, lsr #8
617-
and r2, r2, r0, lsl #8
618-
orr r1, r1, r0, lsr #24
619-
orr r0, r2, r0, lsl #24
620-
orr r0, r0, r1
621-
bx lr
622-
623-
Something like the following would be better (fewer instructions/registers):
624-
eor r1, r0, r0, ror #16
625-
bic r1, r1, #0xff0000
626-
mov r1, r1, lsr #8
627-
eor r0, r1, r0, ror #8
628-
bx lr
629-
630-
A custom Thumb version would also be a slight improvement over the generic
631-
version.
632-
633-
//===---------------------------------------------------------------------===//
634-
635609
Consider the following simple C code:
636610

637611
void foo(unsigned char *a, unsigned char *b, int *c) {

llvm/test/CodeGen/ARM/load-combine-big-endian.ll

Lines changed: 32 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,11 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
5353
; BSWAP is not supported by 32 bit target
5454
; CHECK-LABEL: load_i32_by_i8_bswap:
5555
; CHECK: @ %bb.0:
56-
; CHECK-NEXT: mov r1, #255
5756
; CHECK-NEXT: ldr r0, [r0]
58-
; CHECK-NEXT: orr r1, r1, #16711680
59-
; CHECK-NEXT: and r2, r0, r1
60-
; CHECK-NEXT: and r0, r1, r0, ror #24
61-
; CHECK-NEXT: orr r0, r0, r2, ror #8
57+
; CHECK-NEXT: eor r1, r0, r0, ror #16
58+
; CHECK-NEXT: bic r1, r1, #16711680
59+
; CHECK-NEXT: lsr r1, r1, #8
60+
; CHECK-NEXT: eor r0, r1, r0, ror #8
6261
; CHECK-NEXT: mov pc, lr
6362
;
6463
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -221,16 +220,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
221220
define i64 @load_i64_by_i8_bswap(ptr %arg) {
222221
; CHECK-LABEL: load_i64_by_i8_bswap:
223222
; CHECK: @ %bb.0:
224-
; CHECK-NEXT: mov r2, #255
225223
; CHECK-NEXT: ldr r1, [r0]
226224
; CHECK-NEXT: ldr r0, [r0, #4]
227-
; CHECK-NEXT: orr r2, r2, #16711680
228-
; CHECK-NEXT: and r3, r0, r2
229-
; CHECK-NEXT: and r0, r2, r0, ror #24
230-
; CHECK-NEXT: orr r0, r0, r3, ror #8
231-
; CHECK-NEXT: and r3, r1, r2
232-
; CHECK-NEXT: and r1, r2, r1, ror #24
233-
; CHECK-NEXT: orr r1, r1, r3, ror #8
225+
; CHECK-NEXT: eor r2, r0, r0, ror #16
226+
; CHECK-NEXT: bic r2, r2, #16711680
227+
; CHECK-NEXT: lsr r2, r2, #8
228+
; CHECK-NEXT: eor r0, r2, r0, ror #8
229+
; CHECK-NEXT: eor r2, r1, r1, ror #16
230+
; CHECK-NEXT: bic r2, r2, #16711680
231+
; CHECK-NEXT: lsr r2, r2, #8
232+
; CHECK-NEXT: eor r1, r2, r1, ror #8
234233
; CHECK-NEXT: mov pc, lr
235234
;
236235
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -370,12 +369,11 @@ define i64 @load_i64_by_i8(ptr %arg) {
370369
define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
371370
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
372371
; CHECK: @ %bb.0:
373-
; CHECK-NEXT: mov r1, #255
374372
; CHECK-NEXT: ldr r0, [r0, #1]
375-
; CHECK-NEXT: orr r1, r1, #16711680
376-
; CHECK-NEXT: and r2, r0, r1
377-
; CHECK-NEXT: and r0, r1, r0, ror #24
378-
; CHECK-NEXT: orr r0, r0, r2, ror #8
373+
; CHECK-NEXT: eor r1, r0, r0, ror #16
374+
; CHECK-NEXT: bic r1, r1, #16711680
375+
; CHECK-NEXT: lsr r1, r1, #8
376+
; CHECK-NEXT: eor r0, r1, r0, ror #8
379377
; CHECK-NEXT: mov pc, lr
380378
;
381379
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
@@ -425,12 +423,11 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
425423
define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
426424
; CHECK-LABEL: load_i32_by_i8_neg_offset:
427425
; CHECK: @ %bb.0:
428-
; CHECK-NEXT: mov r1, #255
429426
; CHECK-NEXT: ldr r0, [r0, #-4]
430-
; CHECK-NEXT: orr r1, r1, #16711680
431-
; CHECK-NEXT: and r2, r0, r1
432-
; CHECK-NEXT: and r0, r1, r0, ror #24
433-
; CHECK-NEXT: orr r0, r0, r2, ror #8
427+
; CHECK-NEXT: eor r1, r0, r0, ror #16
428+
; CHECK-NEXT: bic r1, r1, #16711680
429+
; CHECK-NEXT: lsr r1, r1, #8
430+
; CHECK-NEXT: eor r0, r1, r0, ror #8
434431
; CHECK-NEXT: mov pc, lr
435432
;
436433
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
@@ -576,12 +573,11 @@ declare i16 @llvm.bswap.i16(i16)
576573
define i32 @load_i32_by_bswap_i16(ptr %arg) {
577574
; CHECK-LABEL: load_i32_by_bswap_i16:
578575
; CHECK: @ %bb.0:
579-
; CHECK-NEXT: mov r1, #255
580576
; CHECK-NEXT: ldr r0, [r0]
581-
; CHECK-NEXT: orr r1, r1, #16711680
582-
; CHECK-NEXT: and r2, r0, r1
583-
; CHECK-NEXT: and r0, r1, r0, ror #24
584-
; CHECK-NEXT: orr r0, r0, r2, ror #8
577+
; CHECK-NEXT: eor r1, r0, r0, ror #16
578+
; CHECK-NEXT: bic r1, r1, #16711680
579+
; CHECK-NEXT: lsr r1, r1, #8
580+
; CHECK-NEXT: eor r0, r1, r0, ror #8
585581
; CHECK-NEXT: mov pc, lr
586582
;
587583
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
@@ -654,12 +650,11 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
654650
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
655651
; CHECK: @ %bb.0:
656652
; CHECK-NEXT: add r0, r0, r1
657-
; CHECK-NEXT: mov r1, #255
658-
; CHECK-NEXT: orr r1, r1, #16711680
659653
; CHECK-NEXT: ldr r0, [r0, #12]
660-
; CHECK-NEXT: and r2, r0, r1
661-
; CHECK-NEXT: and r0, r1, r0, ror #24
662-
; CHECK-NEXT: orr r0, r0, r2, ror #8
654+
; CHECK-NEXT: eor r1, r0, r0, ror #16
655+
; CHECK-NEXT: bic r1, r1, #16711680
656+
; CHECK-NEXT: lsr r1, r1, #8
657+
; CHECK-NEXT: eor r0, r1, r0, ror #8
663658
; CHECK-NEXT: mov pc, lr
664659
;
665660
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
@@ -718,12 +713,11 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
718713
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
719714
; CHECK: @ %bb.0:
720715
; CHECK-NEXT: add r0, r1, r0
721-
; CHECK-NEXT: mov r1, #255
722-
; CHECK-NEXT: orr r1, r1, #16711680
723716
; CHECK-NEXT: ldr r0, [r0, #13]
724-
; CHECK-NEXT: and r2, r0, r1
725-
; CHECK-NEXT: and r0, r1, r0, ror #24
726-
; CHECK-NEXT: orr r0, r0, r2, ror #8
717+
; CHECK-NEXT: eor r1, r0, r0, ror #16
718+
; CHECK-NEXT: bic r1, r1, #16711680
719+
; CHECK-NEXT: lsr r1, r1, #8
720+
; CHECK-NEXT: eor r0, r1, r0, ror #8
727721
; CHECK-NEXT: mov pc, lr
728722
;
729723
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:

llvm/test/CodeGen/ARM/load-combine.ll

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,11 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
117117
; BSWAP is not supported by 32 bit target
118118
; CHECK-LABEL: load_i32_by_i8_bswap:
119119
; CHECK: @ %bb.0:
120-
; CHECK-NEXT: mov r1, #255
121120
; CHECK-NEXT: ldr r0, [r0]
122-
; CHECK-NEXT: orr r1, r1, #16711680
123-
; CHECK-NEXT: and r2, r0, r1
124-
; CHECK-NEXT: and r0, r1, r0, ror #24
125-
; CHECK-NEXT: orr r0, r0, r2, ror #8
121+
; CHECK-NEXT: eor r1, r0, r0, ror #16
122+
; CHECK-NEXT: bic r1, r1, #16711680
123+
; CHECK-NEXT: lsr r1, r1, #8
124+
; CHECK-NEXT: eor r0, r1, r0, ror #8
126125
; CHECK-NEXT: mov pc, lr
127126
;
128127
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -235,16 +234,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
235234
define i64 @load_i64_by_i8_bswap(ptr %arg) {
236235
; CHECK-LABEL: load_i64_by_i8_bswap:
237236
; CHECK: @ %bb.0:
238-
; CHECK-NEXT: mov r2, #255
239237
; CHECK-NEXT: ldr r1, [r0]
240238
; CHECK-NEXT: ldr r0, [r0, #4]
241-
; CHECK-NEXT: orr r2, r2, #16711680
242-
; CHECK-NEXT: and r3, r0, r2
243-
; CHECK-NEXT: and r0, r2, r0, ror #24
244-
; CHECK-NEXT: orr r0, r0, r3, ror #8
245-
; CHECK-NEXT: and r3, r1, r2
246-
; CHECK-NEXT: and r1, r2, r1, ror #24
247-
; CHECK-NEXT: orr r1, r1, r3, ror #8
239+
; CHECK-NEXT: eor r2, r0, r0, ror #16
240+
; CHECK-NEXT: bic r2, r2, #16711680
241+
; CHECK-NEXT: lsr r2, r2, #8
242+
; CHECK-NEXT: eor r0, r2, r0, ror #8
243+
; CHECK-NEXT: eor r2, r1, r1, ror #16
244+
; CHECK-NEXT: bic r2, r2, #16711680
245+
; CHECK-NEXT: lsr r2, r2, #8
246+
; CHECK-NEXT: eor r1, r2, r1, ror #8
248247
; CHECK-NEXT: mov pc, lr
249248
;
250249
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -406,12 +405,11 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
406405
define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
407406
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
408407
; CHECK: @ %bb.0:
409-
; CHECK-NEXT: mov r1, #255
410408
; CHECK-NEXT: ldr r0, [r0, #1]
411-
; CHECK-NEXT: orr r1, r1, #16711680
412-
; CHECK-NEXT: and r2, r0, r1
413-
; CHECK-NEXT: and r0, r1, r0, ror #24
414-
; CHECK-NEXT: orr r0, r0, r2, ror #8
409+
; CHECK-NEXT: eor r1, r0, r0, ror #16
410+
; CHECK-NEXT: bic r1, r1, #16711680
411+
; CHECK-NEXT: lsr r1, r1, #8
412+
; CHECK-NEXT: eor r0, r1, r0, ror #8
415413
; CHECK-NEXT: mov pc, lr
416414
;
417415
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
@@ -460,12 +458,11 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
460458
define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
461459
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
462460
; CHECK: @ %bb.0:
463-
; CHECK-NEXT: mov r1, #255
464461
; CHECK-NEXT: ldr r0, [r0, #-4]
465-
; CHECK-NEXT: orr r1, r1, #16711680
466-
; CHECK-NEXT: and r2, r0, r1
467-
; CHECK-NEXT: and r0, r1, r0, ror #24
468-
; CHECK-NEXT: orr r0, r0, r2, ror #8
462+
; CHECK-NEXT: eor r1, r0, r0, ror #16
463+
; CHECK-NEXT: bic r1, r1, #16711680
464+
; CHECK-NEXT: lsr r1, r1, #8
465+
; CHECK-NEXT: eor r0, r1, r0, ror #8
469466
; CHECK-NEXT: mov pc, lr
470467
;
471468
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
@@ -516,12 +513,11 @@ declare i16 @llvm.bswap.i16(i16)
516513
define i32 @load_i32_by_bswap_i16(ptr %arg) {
517514
; CHECK-LABEL: load_i32_by_bswap_i16:
518515
; CHECK: @ %bb.0:
519-
; CHECK-NEXT: mov r1, #255
520516
; CHECK-NEXT: ldr r0, [r0]
521-
; CHECK-NEXT: orr r1, r1, #16711680
522-
; CHECK-NEXT: and r2, r0, r1
523-
; CHECK-NEXT: and r0, r1, r0, ror #24
524-
; CHECK-NEXT: orr r0, r0, r2, ror #8
517+
; CHECK-NEXT: eor r1, r0, r0, ror #16
518+
; CHECK-NEXT: bic r1, r1, #16711680
519+
; CHECK-NEXT: lsr r1, r1, #8
520+
; CHECK-NEXT: eor r0, r1, r0, ror #8
525521
; CHECK-NEXT: mov pc, lr
526522
;
527523
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:

0 commit comments

Comments
 (0)