Skip to content

Commit 50511df

Browse files
author
Irina Dobrescu
committed
[AArch64] Lower bitreverse in ISel
Adding lowering support for bitreverse. Previously, lowering bitreverse would expand it into a series of other instructions. This patch makes it so this produces a single rbit instruction instead. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D102397
1 parent 888ce70 commit 50511df

File tree

9 files changed

+299
-50
lines changed

9 files changed

+299
-50
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10879,7 +10879,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1087910879
}
1088010880
case NEON::BI__builtin_neon_vrbit_v:
1088110881
case NEON::BI__builtin_neon_vrbitq_v: {
10882-
Int = Intrinsic::aarch64_neon_rbit;
10882+
Int = Intrinsic::bitreverse;
1088310883
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
1088410884
}
1088510885
case NEON::BI__builtin_neon_vaddv_u8:

clang/test/CodeGen/aarch64-neon-misc.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,42 +1766,42 @@ poly8x16_t test_vmvnq_p8(poly8x16_t a) {
17661766
}
17671767

17681768
// CHECK-LABEL: @test_vrbit_s8(
1769-
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a)
1769+
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
17701770
// CHECK: ret <8 x i8> [[VRBIT_I]]
17711771
int8x8_t test_vrbit_s8(int8x8_t a) {
17721772
return vrbit_s8(a);
17731773
}
17741774

17751775
// CHECK-LABEL: @test_vrbitq_s8(
1776-
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a)
1776+
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
17771777
// CHECK: ret <16 x i8> [[VRBIT_I]]
17781778
int8x16_t test_vrbitq_s8(int8x16_t a) {
17791779
return vrbitq_s8(a);
17801780
}
17811781

17821782
// CHECK-LABEL: @test_vrbit_u8(
1783-
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a)
1783+
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
17841784
// CHECK: ret <8 x i8> [[VRBIT_I]]
17851785
uint8x8_t test_vrbit_u8(uint8x8_t a) {
17861786
return vrbit_u8(a);
17871787
}
17881788

17891789
// CHECK-LABEL: @test_vrbitq_u8(
1790-
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a)
1790+
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
17911791
// CHECK: ret <16 x i8> [[VRBIT_I]]
17921792
uint8x16_t test_vrbitq_u8(uint8x16_t a) {
17931793
return vrbitq_u8(a);
17941794
}
17951795

17961796
// CHECK-LABEL: @test_vrbit_p8(
1797-
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a)
1797+
// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
17981798
// CHECK: ret <8 x i8> [[VRBIT_I]]
17991799
poly8x8_t test_vrbit_p8(poly8x8_t a) {
18001800
return vrbit_p8(a);
18011801
}
18021802

18031803
// CHECK-LABEL: @test_vrbitq_p8(
1804-
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a)
1804+
// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
18051805
// CHECK: ret <16 x i8> [[VRBIT_I]]
18061806
poly8x16_t test_vrbitq_p8(poly8x16_t a) {
18071807
return vrbitq_p8(a);

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -444,9 +444,6 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
444444
def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
445445
def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
446446

447-
// Vector Bitwise Reverse
448-
def int_aarch64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
449-
450447
// Vector Conversions Between Half-Precision and Single-Precision.
451448
def int_aarch64_neon_vcvtfp2hf
452449
: DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
553553
F->arg_begin()->getType());
554554
return true;
555555
}
556+
if (Name.startswith("aarch64.neon.rbit")) {
557+
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558+
F->arg_begin()->getType());
559+
return true;
560+
}
556561
if (Name.startswith("arm.neon.vclz")) {
557562
Type* args[2] = {
558563
F->arg_begin()->getType(),

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10231023

10241024
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
10251025
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1026+
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1027+
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
10261028

10271029
// AArch64 doesn't have MUL.2d:
10281030
setOperationAction(ISD::MUL, MVT::v2i64, Expand);

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4166,7 +4166,7 @@ def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
41664166
def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
41674167
def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
41684168

4169-
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
4169+
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
41704170
defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
41714171
defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
41724172
defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;

llvm/test/CodeGen/AArch64/arm64-vbitwise.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,20 @@ define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
44
;CHECK-LABEL: rbit_8b:
55
;CHECK: rbit.8b
66
%tmp1 = load <8 x i8>, <8 x i8>* %A
7-
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1)
7+
%tmp3 = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %tmp1)
88
ret <8 x i8> %tmp3
99
}
1010

1111
define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
1212
;CHECK-LABEL: rbit_16b:
1313
;CHECK: rbit.16b
1414
%tmp1 = load <16 x i8>, <16 x i8>* %A
15-
%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1)
15+
%tmp3 = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %tmp1)
1616
ret <16 x i8> %tmp3
1717
}
1818

19-
declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
20-
declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
19+
declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) nounwind readnone
20+
declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) nounwind readnone
2121

2222
define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
2323
;CHECK-LABEL: sxtl8h:
Lines changed: 159 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
12
; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s
23

34
; These tests just check that the plumbing is in place for @llvm.bitreverse.
@@ -6,13 +7,16 @@ declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
67

78
define <2 x i16> @f(<2 x i16> %a) {
89
; CHECK-LABEL: f:
9-
; CHECK: fmov [[REG1:w[0-9]+]], s0
10-
; CHECK-DAG: rbit [[REG2:w[0-9]+]], [[REG1]]
11-
; CHECK-DAG: fmov s0, [[REG2]]
12-
; CHECK-DAG: mov [[REG3:w[0-9]+]], v0.s[1]
13-
; CHECK-DAG: rbit [[REG4:w[0-9]+]], [[REG3]]
14-
; CHECK-DAG: mov v0.s[1], [[REG4]]
15-
; CHECK-DAG: ushr v0.2s, v0.2s, #16
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
12+
; CHECK-NEXT: fmov w8, s0
13+
; CHECK-NEXT: rbit w8, w8
14+
; CHECK-NEXT: mov w9, v0.s[1]
15+
; CHECK-NEXT: fmov s0, w8
16+
; CHECK-NEXT: rbit w8, w9
17+
; CHECK-NEXT: mov v0.s[1], w8
18+
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
19+
; CHECK-NEXT: ret
1620
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
1721
ret <2 x i16> %b
1822
}
@@ -21,41 +25,161 @@ declare i8 @llvm.bitreverse.i8(i8) readnone
2125

2226
define i8 @g(i8 %a) {
2327
; CHECK-LABEL: g:
24-
; CHECK: rbit [[REG:w[0-9]+]], w0
25-
; CHECK-NEXT: lsr w0, [[REG]], #24
26-
; CHECK-NEXT: ret
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: rbit w8, w0
30+
; CHECK-NEXT: lsr w0, w8, #24
31+
; CHECK-NEXT: ret
2732
%b = call i8 @llvm.bitreverse.i8(i8 %a)
2833
ret i8 %b
2934
}
3035

36+
declare i16 @llvm.bitreverse.i16(i16) readnone
37+
38+
define i16 @g_16(i16 %a) {
39+
; CHECK-LABEL: g_16:
40+
; CHECK: // %bb.0:
41+
; CHECK-NEXT: rbit w8, w0
42+
; CHECK-NEXT: lsr w0, w8, #16
43+
; CHECK-NEXT: ret
44+
%b = call i16 @llvm.bitreverse.i16(i16 %a)
45+
ret i16 %b
46+
}
47+
48+
declare i32 @llvm.bitreverse.i32(i32) readnone
49+
50+
define i32 @g_32(i32 %a) {
51+
; CHECK-LABEL: g_32:
52+
; CHECK: // %bb.0:
53+
; CHECK-NEXT: rbit w0, w0
54+
; CHECK-NEXT: ret
55+
%b = call i32 @llvm.bitreverse.i32(i32 %a)
56+
ret i32 %b
57+
}
58+
59+
declare i64 @llvm.bitreverse.i64(i64) readnone
60+
61+
define i64 @g_64(i64 %a) {
62+
; CHECK-LABEL: g_64:
63+
; CHECK: // %bb.0:
64+
; CHECK-NEXT: rbit x0, x0
65+
; CHECK-NEXT: ret
66+
%b = call i64 @llvm.bitreverse.i64(i64 %a)
67+
ret i64 %b
68+
}
69+
3170
declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone
3271

3372
define <8 x i8> @g_vec(<8 x i8> %a) {
34-
; CHECK-DAG: movi [[M1:v.*]], #15
35-
; CHECK-DAG: movi [[M2:v.*]], #240
36-
; CHECK: and [[A1:v.*]], v0.8b, [[M1]]
37-
; CHECK: and [[A2:v.*]], v0.8b, [[M2]]
38-
; CHECK-DAG: shl [[L4:v.*]], [[A1]], #4
39-
; CHECK-DAG: ushr [[R4:v.*]], [[A2]], #4
40-
; CHECK-DAG: orr [[V4:v.*]], [[R4]], [[L4]]
41-
42-
; CHECK-DAG: movi [[M3:v.*]], #51
43-
; CHECK-DAG: movi [[M4:v.*]], #204
44-
; CHECK: and [[A3:v.*]], [[V4]], [[M3]]
45-
; CHECK: and [[A4:v.*]], [[V4]], [[M4]]
46-
; CHECK-DAG: shl [[L2:v.*]], [[A3]], #2
47-
; CHECK-DAG: ushr [[R2:v.*]], [[A4]], #2
48-
; CHECK-DAG: orr [[V2:v.*]], [[R2]], [[L2]]
49-
50-
; CHECK-DAG: movi [[M5:v.*]], #85
51-
; CHECK-DAG: movi [[M6:v.*]], #170
52-
; CHECK: and [[A5:v.*]], [[V2]], [[M5]]
53-
; CHECK: and [[A6:v.*]], [[V2]], [[M6]]
54-
; CHECK-DAG: shl [[L1:v.*]], [[A5]], #1
55-
; CHECK-DAG: ushr [[R1:v.*]], [[A6]], #1
56-
; CHECK: orr [[V1:v.*]], [[R1]], [[L1]]
57-
58-
; CHECK: ret
73+
; CHECK-LABEL: g_vec:
74+
; CHECK: // %bb.0:
75+
; CHECK-NEXT: rbit v0.8b, v0.8b
76+
; CHECK-NEXT: ret
5977
%b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
6078
ret <8 x i8> %b
6179
}
80+
81+
declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) readnone
82+
83+
define <16 x i8> @g_vec_16x8(<16 x i8> %a) {
84+
; CHECK-LABEL: g_vec_16x8:
85+
; CHECK: // %bb.0:
86+
; CHECK-NEXT: rbit v0.16b, v0.16b
87+
; CHECK-NEXT: ret
88+
%b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
89+
ret <16 x i8> %b
90+
}
91+
92+
declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>) readnone
93+
94+
define <4 x i16> @g_vec_4x16(<4 x i16> %a) {
95+
; CHECK-LABEL: g_vec_4x16:
96+
; CHECK: // %bb.0:
97+
; CHECK-NEXT: rev16 v0.8b, v0.8b
98+
; CHECK-NEXT: rbit v0.8b, v0.8b
99+
; CHECK-NEXT: ret
100+
%b = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a)
101+
ret <4 x i16> %b
102+
}
103+
104+
declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) readnone
105+
106+
define <8 x i16> @g_vec_8x16(<8 x i16> %a) {
107+
; CHECK-LABEL: g_vec_8x16:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: rev16 v0.16b, v0.16b
110+
; CHECK-NEXT: rbit v0.16b, v0.16b
111+
; CHECK-NEXT: ret
112+
%b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
113+
ret <8 x i16> %b
114+
}
115+
116+
declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) readnone
117+
118+
define <2 x i32> @g_vec_2x32(<2 x i32> %a) {
119+
; CHECK-LABEL: g_vec_2x32:
120+
; CHECK: // %bb.0:
121+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
122+
; CHECK-NEXT: fmov w8, s0
123+
; CHECK-NEXT: rbit w8, w8
124+
; CHECK-NEXT: mov w9, v0.s[1]
125+
; CHECK-NEXT: fmov s0, w8
126+
; CHECK-NEXT: rbit w8, w9
127+
; CHECK-NEXT: mov v0.s[1], w8
128+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
129+
; CHECK-NEXT: ret
130+
131+
%b = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
132+
ret <2 x i32> %b
133+
}
134+
135+
declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone
136+
137+
define <4 x i32> @g_vec_4x32(<4 x i32> %a) {
138+
; CHECK-LABEL: g_vec_4x32:
139+
; CHECK: // %bb.0:
140+
; CHECK-NEXT: fmov w10, s0
141+
; CHECK-NEXT: mov w8, v0.s[1]
142+
; CHECK-NEXT: rbit w10, w10
143+
; CHECK-NEXT: mov w9, v0.s[2]
144+
; CHECK-NEXT: mov w11, v0.s[3]
145+
; CHECK-NEXT: fmov s0, w10
146+
; CHECK-NEXT: rbit w8, w8
147+
; CHECK-NEXT: rbit w9, w9
148+
; CHECK-NEXT: mov v0.s[1], w8
149+
; CHECK-NEXT: mov v0.s[2], w9
150+
; CHECK-NEXT: rbit w8, w11
151+
; CHECK-NEXT: mov v0.s[3], w8
152+
; CHECK-NEXT: ret
153+
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
154+
ret <4 x i32> %b
155+
}
156+
157+
declare <1 x i64> @llvm.bitreverse.v1i64(<1 x i64>) readnone
158+
159+
define <1 x i64> @g_vec_1x64(<1 x i64> %a) {
160+
; CHECK-LABEL: g_vec_1x64:
161+
; CHECK: // %bb.0:
162+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
163+
; CHECK-NEXT: fmov x8, d0
164+
; CHECK-NEXT: rbit x8, x8
165+
; CHECK-NEXT: fmov d0, x8
166+
; CHECK-NEXT: ret
167+
%b = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %a)
168+
ret <1 x i64> %b
169+
}
170+
171+
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) readnone
172+
173+
define <2 x i64> @g_vec_2x64(<2 x i64> %a) {
174+
; CHECK-LABEL: g_vec_2x64:
175+
; CHECK: // %bb.0:
176+
; CHECK-NEXT: fmov x8, d0
177+
; CHECK-NEXT: rbit x8, x8
178+
; CHECK-NEXT: mov x9, v0.d[1]
179+
; CHECK-NEXT: fmov d0, x8
180+
; CHECK-NEXT: rbit x8, x9
181+
; CHECK-NEXT: mov v0.d[1], x8
182+
; CHECK-NEXT: ret
183+
%b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
184+
ret <2 x i64> %b
185+
}

0 commit comments

Comments
 (0)