Skip to content

Commit 2cee8e2

Browse files
committed
[DAGCombine] Improve bswap lowering for machines that support bit rotates
Source: Hacker's delight.
1 parent 2b42c6c commit 2cee8e2

File tree

3 files changed

+78
-96
lines changed

3 files changed

+78
-96
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9899,6 +9899,18 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
98999899
// Use a rotate by 8. This can be further expanded if necessary.
99009900
return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
99019901
case MVT::i32:
9902+
if (isOperationLegal(ISD::ROTR, VT)) {
9903+
// (x & 0x00FF00FF) rotl 8 | (x rotr 8) & 0x00FF00FF
9904+
SDValue And = DAG.getNode(ISD::AND, dl, VT, Op,
9905+
DAG.getConstant(0x00FF00FF, dl, VT));
9906+
SDValue Rotl =
9907+
DAG.getNode(ISD::ROTL, dl, VT, And, DAG.getConstant(8, dl, SHVT));
9908+
SDValue Rotr =
9909+
DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9910+
SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotr,
9911+
DAG.getConstant(0x00FF00FF, dl, VT));
9912+
return DAG.getNode(ISD::OR, dl, VT, Rotl, And2);
9913+
}
99029914
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
99039915
Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
99049916
DAG.getConstant(0xFF00, dl, VT));

llvm/test/CodeGen/ARM/load-combine-big-endian.ll

Lines changed: 38 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
5353
; BSWAP is not supported by 32 bit target
5454
; CHECK-LABEL: load_i32_by_i8_bswap:
5555
; CHECK: @ %bb.0:
56+
; CHECK-NEXT: mov r1, #255
5657
; CHECK-NEXT: ldr r0, [r0]
57-
; CHECK-NEXT: mov r1, #65280
58-
; CHECK-NEXT: and r2, r0, #65280
59-
; CHECK-NEXT: and r1, r1, r0, lsr #8
60-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
61-
; CHECK-NEXT: lsl r0, r0, #24
62-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
63-
; CHECK-NEXT: orr r0, r0, r1
58+
; CHECK-NEXT: orr r1, r1, #16711680
59+
; CHECK-NEXT: and r2, r0, r1
60+
; CHECK-NEXT: and r0, r1, r0, lsr #8
61+
; CHECK-NEXT: orr r0, r0, r2, ror #24
6462
; CHECK-NEXT: mov pc, lr
6563
;
6664
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -223,21 +221,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
223221
define i64 @load_i64_by_i8_bswap(ptr %arg) {
224222
; CHECK-LABEL: load_i64_by_i8_bswap:
225223
; CHECK: @ %bb.0:
224+
; CHECK-NEXT: mov r2, #255
226225
; CHECK-NEXT: ldr r1, [r0]
227-
; CHECK-NEXT: mov r12, #65280
228226
; CHECK-NEXT: ldr r0, [r0, #4]
229-
; CHECK-NEXT: and r2, r0, #65280
230-
; CHECK-NEXT: and r3, r12, r0, lsr #8
231-
; CHECK-NEXT: orr r3, r3, r0, lsr #24
232-
; CHECK-NEXT: lsl r0, r0, #24
233-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
234-
; CHECK-NEXT: and r2, r12, r1, lsr #8
235-
; CHECK-NEXT: orr r0, r0, r3
236-
; CHECK-NEXT: and r3, r1, #65280
237-
; CHECK-NEXT: orr r2, r2, r1, lsr #24
238-
; CHECK-NEXT: lsl r1, r1, #24
239-
; CHECK-NEXT: orr r1, r1, r3, lsl #8
240-
; CHECK-NEXT: orr r1, r1, r2
227+
; CHECK-NEXT: orr r2, r2, #16711680
228+
; CHECK-NEXT: and r3, r0, r2
229+
; CHECK-NEXT: and r0, r2, r0, lsr #8
230+
; CHECK-NEXT: orr r0, r0, r3, ror #24
231+
; CHECK-NEXT: and r3, r1, r2
232+
; CHECK-NEXT: and r1, r2, r1, lsr #8
233+
; CHECK-NEXT: orr r1, r1, r3, ror #24
241234
; CHECK-NEXT: mov pc, lr
242235
;
243236
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -377,14 +370,12 @@ define i64 @load_i64_by_i8(ptr %arg) {
377370
define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
378371
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
379372
; CHECK: @ %bb.0:
373+
; CHECK-NEXT: mov r1, #255
380374
; CHECK-NEXT: ldr r0, [r0, #1]
381-
; CHECK-NEXT: mov r1, #65280
382-
; CHECK-NEXT: and r2, r0, #65280
383-
; CHECK-NEXT: and r1, r1, r0, lsr #8
384-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
385-
; CHECK-NEXT: lsl r0, r0, #24
386-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
387-
; CHECK-NEXT: orr r0, r0, r1
375+
; CHECK-NEXT: orr r1, r1, #16711680
376+
; CHECK-NEXT: and r2, r0, r1
377+
; CHECK-NEXT: and r0, r1, r0, lsr #8
378+
; CHECK-NEXT: orr r0, r0, r2, ror #24
388379
; CHECK-NEXT: mov pc, lr
389380
;
390381
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
@@ -434,14 +425,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
434425
define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
435426
; CHECK-LABEL: load_i32_by_i8_neg_offset:
436427
; CHECK: @ %bb.0:
428+
; CHECK-NEXT: mov r1, #255
437429
; CHECK-NEXT: ldr r0, [r0, #-4]
438-
; CHECK-NEXT: mov r1, #65280
439-
; CHECK-NEXT: and r2, r0, #65280
440-
; CHECK-NEXT: and r1, r1, r0, lsr #8
441-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
442-
; CHECK-NEXT: lsl r0, r0, #24
443-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
444-
; CHECK-NEXT: orr r0, r0, r1
430+
; CHECK-NEXT: orr r1, r1, #16711680
431+
; CHECK-NEXT: and r2, r0, r1
432+
; CHECK-NEXT: and r0, r1, r0, lsr #8
433+
; CHECK-NEXT: orr r0, r0, r2, ror #24
445434
; CHECK-NEXT: mov pc, lr
446435
;
447436
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
@@ -587,14 +576,12 @@ declare i16 @llvm.bswap.i16(i16)
587576
define i32 @load_i32_by_bswap_i16(ptr %arg) {
588577
; CHECK-LABEL: load_i32_by_bswap_i16:
589578
; CHECK: @ %bb.0:
579+
; CHECK-NEXT: mov r1, #255
590580
; CHECK-NEXT: ldr r0, [r0]
591-
; CHECK-NEXT: mov r1, #65280
592-
; CHECK-NEXT: and r2, r0, #65280
593-
; CHECK-NEXT: and r1, r1, r0, lsr #8
594-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
595-
; CHECK-NEXT: lsl r0, r0, #24
596-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
597-
; CHECK-NEXT: orr r0, r0, r1
581+
; CHECK-NEXT: orr r1, r1, #16711680
582+
; CHECK-NEXT: and r2, r0, r1
583+
; CHECK-NEXT: and r0, r1, r0, lsr #8
584+
; CHECK-NEXT: orr r0, r0, r2, ror #24
598585
; CHECK-NEXT: mov pc, lr
599586
;
600587
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
@@ -667,14 +654,12 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
667654
; CHECK-LABEL: load_i32_by_i8_base_offset_index:
668655
; CHECK: @ %bb.0:
669656
; CHECK-NEXT: add r0, r0, r1
670-
; CHECK-NEXT: mov r1, #65280
657+
; CHECK-NEXT: mov r1, #255
658+
; CHECK-NEXT: orr r1, r1, #16711680
671659
; CHECK-NEXT: ldr r0, [r0, #12]
672-
; CHECK-NEXT: and r2, r0, #65280
673-
; CHECK-NEXT: and r1, r1, r0, lsr #8
674-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
675-
; CHECK-NEXT: lsl r0, r0, #24
676-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
677-
; CHECK-NEXT: orr r0, r0, r1
660+
; CHECK-NEXT: and r2, r0, r1
661+
; CHECK-NEXT: and r0, r1, r0, lsr #8
662+
; CHECK-NEXT: orr r0, r0, r2, ror #24
678663
; CHECK-NEXT: mov pc, lr
679664
;
680665
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
@@ -733,14 +718,12 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
733718
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
734719
; CHECK: @ %bb.0:
735720
; CHECK-NEXT: add r0, r1, r0
736-
; CHECK-NEXT: mov r1, #65280
721+
; CHECK-NEXT: mov r1, #255
722+
; CHECK-NEXT: orr r1, r1, #16711680
737723
; CHECK-NEXT: ldr r0, [r0, #13]
738-
; CHECK-NEXT: and r2, r0, #65280
739-
; CHECK-NEXT: and r1, r1, r0, lsr #8
740-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
741-
; CHECK-NEXT: lsl r0, r0, #24
742-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
743-
; CHECK-NEXT: orr r0, r0, r1
724+
; CHECK-NEXT: and r2, r0, r1
725+
; CHECK-NEXT: and r0, r1, r0, lsr #8
726+
; CHECK-NEXT: orr r0, r0, r2, ror #24
744727
; CHECK-NEXT: mov pc, lr
745728
;
746729
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:

llvm/test/CodeGen/ARM/load-combine.ll

Lines changed: 28 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
117117
; BSWAP is not supported by 32 bit target
118118
; CHECK-LABEL: load_i32_by_i8_bswap:
119119
; CHECK: @ %bb.0:
120+
; CHECK-NEXT: mov r1, #255
120121
; CHECK-NEXT: ldr r0, [r0]
121-
; CHECK-NEXT: mov r1, #65280
122-
; CHECK-NEXT: and r2, r0, #65280
123-
; CHECK-NEXT: and r1, r1, r0, lsr #8
124-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
125-
; CHECK-NEXT: lsl r0, r0, #24
126-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
127-
; CHECK-NEXT: orr r0, r0, r1
122+
; CHECK-NEXT: orr r1, r1, #16711680
123+
; CHECK-NEXT: and r2, r0, r1
124+
; CHECK-NEXT: and r0, r1, r0, lsr #8
125+
; CHECK-NEXT: orr r0, r0, r2, ror #24
128126
; CHECK-NEXT: mov pc, lr
129127
;
130128
; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -237,21 +235,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
237235
define i64 @load_i64_by_i8_bswap(ptr %arg) {
238236
; CHECK-LABEL: load_i64_by_i8_bswap:
239237
; CHECK: @ %bb.0:
238+
; CHECK-NEXT: mov r2, #255
240239
; CHECK-NEXT: ldr r1, [r0]
241-
; CHECK-NEXT: mov r12, #65280
242240
; CHECK-NEXT: ldr r0, [r0, #4]
243-
; CHECK-NEXT: and r2, r0, #65280
244-
; CHECK-NEXT: and r3, r12, r0, lsr #8
245-
; CHECK-NEXT: orr r3, r3, r0, lsr #24
246-
; CHECK-NEXT: lsl r0, r0, #24
247-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
248-
; CHECK-NEXT: and r2, r12, r1, lsr #8
249-
; CHECK-NEXT: orr r0, r0, r3
250-
; CHECK-NEXT: and r3, r1, #65280
251-
; CHECK-NEXT: orr r2, r2, r1, lsr #24
252-
; CHECK-NEXT: lsl r1, r1, #24
253-
; CHECK-NEXT: orr r1, r1, r3, lsl #8
254-
; CHECK-NEXT: orr r1, r1, r2
241+
; CHECK-NEXT: orr r2, r2, #16711680
242+
; CHECK-NEXT: and r3, r0, r2
243+
; CHECK-NEXT: and r0, r2, r0, lsr #8
244+
; CHECK-NEXT: orr r0, r0, r3, ror #24
245+
; CHECK-NEXT: and r3, r1, r2
246+
; CHECK-NEXT: and r1, r2, r1, lsr #8
247+
; CHECK-NEXT: orr r1, r1, r3, ror #24
255248
; CHECK-NEXT: mov pc, lr
256249
;
257250
; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -413,14 +406,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
413406
define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
414407
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
415408
; CHECK: @ %bb.0:
409+
; CHECK-NEXT: mov r1, #255
416410
; CHECK-NEXT: ldr r0, [r0, #1]
417-
; CHECK-NEXT: mov r1, #65280
418-
; CHECK-NEXT: and r2, r0, #65280
419-
; CHECK-NEXT: and r1, r1, r0, lsr #8
420-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
421-
; CHECK-NEXT: lsl r0, r0, #24
422-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
423-
; CHECK-NEXT: orr r0, r0, r1
411+
; CHECK-NEXT: orr r1, r1, #16711680
412+
; CHECK-NEXT: and r2, r0, r1
413+
; CHECK-NEXT: and r0, r1, r0, lsr #8
414+
; CHECK-NEXT: orr r0, r0, r2, ror #24
424415
; CHECK-NEXT: mov pc, lr
425416
;
426417
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
@@ -469,14 +460,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
469460
define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
470461
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
471462
; CHECK: @ %bb.0:
463+
; CHECK-NEXT: mov r1, #255
472464
; CHECK-NEXT: ldr r0, [r0, #-4]
473-
; CHECK-NEXT: mov r1, #65280
474-
; CHECK-NEXT: and r2, r0, #65280
475-
; CHECK-NEXT: and r1, r1, r0, lsr #8
476-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
477-
; CHECK-NEXT: lsl r0, r0, #24
478-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
479-
; CHECK-NEXT: orr r0, r0, r1
465+
; CHECK-NEXT: orr r1, r1, #16711680
466+
; CHECK-NEXT: and r2, r0, r1
467+
; CHECK-NEXT: and r0, r1, r0, lsr #8
468+
; CHECK-NEXT: orr r0, r0, r2, ror #24
480469
; CHECK-NEXT: mov pc, lr
481470
;
482471
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
@@ -527,14 +516,12 @@ declare i16 @llvm.bswap.i16(i16)
527516
define i32 @load_i32_by_bswap_i16(ptr %arg) {
528517
; CHECK-LABEL: load_i32_by_bswap_i16:
529518
; CHECK: @ %bb.0:
519+
; CHECK-NEXT: mov r1, #255
530520
; CHECK-NEXT: ldr r0, [r0]
531-
; CHECK-NEXT: mov r1, #65280
532-
; CHECK-NEXT: and r2, r0, #65280
533-
; CHECK-NEXT: and r1, r1, r0, lsr #8
534-
; CHECK-NEXT: orr r1, r1, r0, lsr #24
535-
; CHECK-NEXT: lsl r0, r0, #24
536-
; CHECK-NEXT: orr r0, r0, r2, lsl #8
537-
; CHECK-NEXT: orr r0, r0, r1
521+
; CHECK-NEXT: orr r1, r1, #16711680
522+
; CHECK-NEXT: and r2, r0, r1
523+
; CHECK-NEXT: and r0, r1, r0, lsr #8
524+
; CHECK-NEXT: orr r0, r0, r2, ror #24
538525
; CHECK-NEXT: mov pc, lr
539526
;
540527
; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:

0 commit comments

Comments
 (0)