Skip to content

Commit becaa68

Browse files
committed
[ARM] Constant fold VCTP intrinsics
We can sometimes get into the situation where the operand to a vctp intrinsic becomes constant, such as after a loop is fully unrolled. This adds the constant folding needed for them, allowing them to simplify away and hopefully simplifying remaining instructions. Differential Revision: https://reviews.llvm.org/D84110
1 parent e37b220 commit becaa68

File tree

3 files changed

+303
-1
lines changed

3 files changed

+303
-1
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/IR/IntrinsicInst.h"
4242
#include "llvm/IR/Intrinsics.h"
4343
#include "llvm/IR/IntrinsicsAMDGPU.h"
44+
#include "llvm/IR/IntrinsicsARM.h"
4445
#include "llvm/IR/IntrinsicsX86.h"
4546
#include "llvm/IR/Operator.h"
4647
#include "llvm/IR/Type.h"
@@ -1456,6 +1457,11 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
14561457
case Intrinsic::experimental_vector_reduce_smax:
14571458
case Intrinsic::experimental_vector_reduce_umin:
14581459
case Intrinsic::experimental_vector_reduce_umax:
1460+
// Target intrinsics
1461+
case Intrinsic::arm_mve_vctp8:
1462+
case Intrinsic::arm_mve_vctp16:
1463+
case Intrinsic::arm_mve_vctp32:
1464+
case Intrinsic::arm_mve_vctp64:
14591465
return true;
14601466

14611467
// Floating point operations cannot be folded in strictfp functions in
@@ -2719,7 +2725,8 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
27192725
SmallVector<Constant *, 4> Lane(Operands.size());
27202726
Type *Ty = FVTy->getElementType();
27212727

2722-
if (IntrinsicID == Intrinsic::masked_load) {
2728+
switch (IntrinsicID) {
2729+
case Intrinsic::masked_load: {
27232730
auto *SrcPtr = Operands[0];
27242731
auto *Mask = Operands[2];
27252732
auto *Passthru = Operands[3];
@@ -2757,6 +2764,32 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
27572764
return nullptr;
27582765
return ConstantVector::get(NewElements);
27592766
}
2767+
case Intrinsic::arm_mve_vctp8:
2768+
case Intrinsic::arm_mve_vctp16:
2769+
case Intrinsic::arm_mve_vctp32:
2770+
case Intrinsic::arm_mve_vctp64: {
2771+
if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
2772+
unsigned Lanes = FVTy->getNumElements();
2773+
uint64_t Limit = Op->getZExtValue();
2774+
// vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
2775+
// sure we get the limit right in that case and set all relevant lanes.
2776+
if (IntrinsicID == Intrinsic::arm_mve_vctp64)
2777+
Limit *= 2;
2778+
2779+
SmallVector<Constant *, 16> NCs;
2780+
for (unsigned i = 0; i < Lanes; i++) {
2781+
if (i < Limit)
2782+
NCs.push_back(ConstantInt::getTrue(Ty));
2783+
else
2784+
NCs.push_back(ConstantInt::getFalse(Ty));
2785+
}
2786+
return ConstantVector::get(NCs);
2787+
}
2788+
break;
2789+
}
2790+
default:
2791+
break;
2792+
}
27602793

27612794
for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
27622795
// Gather a column of constants.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
if not 'ARM' in config.root.targets:
2+
config.unsupported = True
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -instsimplify -S -o - %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5+
6+
define <16 x i1> @vctp8_0() {
7+
; CHECK-LABEL: @vctp8_0(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: ret <16 x i1> zeroinitializer
10+
;
11+
entry:
12+
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 0)
13+
ret <16 x i1> %int
14+
}
15+
16+
define <16 x i1> @vctp8_1() {
17+
; CHECK-LABEL: @vctp8_1(
18+
; CHECK-NEXT: entry:
19+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
20+
;
21+
entry:
22+
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 1)
23+
ret <16 x i1> %int
24+
}
25+
26+
define <16 x i1> @vctp8_8() {
27+
; CHECK-LABEL: @vctp8_8(
28+
; CHECK-NEXT: entry:
29+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
30+
;
31+
entry:
32+
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 8)
33+
ret <16 x i1> %int
34+
}
35+
36+
define <16 x i1> @vctp8_15() {
37+
; CHECK-LABEL: @vctp8_15(
38+
; CHECK-NEXT: entry:
39+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
40+
;
41+
entry:
42+
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 15)
43+
ret <16 x i1> %int
44+
}
45+
46+
define <16 x i1> @vctp8_16() {
47+
; CHECK-LABEL: @vctp8_16(
48+
; CHECK-NEXT: entry:
49+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
50+
;
51+
entry:
52+
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16)
53+
ret <16 x i1> %int
54+
}
55+
56+
define <16 x i1> @vctp8_100() {
57+
; CHECK-LABEL: @vctp8_100(
58+
; CHECK-NEXT: entry:
59+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
60+
;
61+
entry:
62+
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100)
63+
ret <16 x i1> %int
64+
}
65+
66+
define <16 x i1> @vctp8_m1() {
67+
; CHECK-LABEL: @vctp8_m1(
68+
; CHECK-NEXT: entry:
69+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
70+
;
71+
entry:
72+
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1)
73+
ret <16 x i1> %int
74+
}
75+
76+
77+
78+
define <8 x i1> @vctp16_0() {
79+
; CHECK-LABEL: @vctp16_0(
80+
; CHECK-NEXT: entry:
81+
; CHECK-NEXT: ret <8 x i1> zeroinitializer
82+
;
83+
entry:
84+
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 0)
85+
ret <8 x i1> %int
86+
}
87+
88+
define <8 x i1> @vctp16_1() {
89+
; CHECK-LABEL: @vctp16_1(
90+
; CHECK-NEXT: entry:
91+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
92+
;
93+
entry:
94+
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 1)
95+
ret <8 x i1> %int
96+
}
97+
98+
define <8 x i1> @vctp16_4() {
99+
; CHECK-LABEL: @vctp16_4(
100+
; CHECK-NEXT: entry:
101+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
102+
;
103+
entry:
104+
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 4)
105+
ret <8 x i1> %int
106+
}
107+
108+
define <8 x i1> @vctp16_7() {
109+
; CHECK-LABEL: @vctp16_7(
110+
; CHECK-NEXT: entry:
111+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
112+
;
113+
entry:
114+
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 7)
115+
ret <8 x i1> %int
116+
}
117+
118+
define <8 x i1> @vctp16_8() {
119+
; CHECK-LABEL: @vctp16_8(
120+
; CHECK-NEXT: entry:
121+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
122+
;
123+
entry:
124+
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8)
125+
ret <8 x i1> %int
126+
}
127+
128+
define <8 x i1> @vctp16_100() {
129+
; CHECK-LABEL: @vctp16_100(
130+
; CHECK-NEXT: entry:
131+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
132+
;
133+
entry:
134+
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100)
135+
ret <8 x i1> %int
136+
}
137+
138+
define <8 x i1> @vctp16_m1() {
139+
; CHECK-LABEL: @vctp16_m1(
140+
; CHECK-NEXT: entry:
141+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
142+
;
143+
entry:
144+
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1)
145+
ret <8 x i1> %int
146+
}
147+
148+
149+
150+
define <4 x i1> @vctp32_0() {
151+
; CHECK-LABEL: @vctp32_0(
152+
; CHECK-NEXT: entry:
153+
; CHECK-NEXT: ret <4 x i1> zeroinitializer
154+
;
155+
entry:
156+
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 0)
157+
ret <4 x i1> %int
158+
}
159+
160+
define <4 x i1> @vctp32_1() {
161+
; CHECK-LABEL: @vctp32_1(
162+
; CHECK-NEXT: entry:
163+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
164+
;
165+
entry:
166+
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 1)
167+
ret <4 x i1> %int
168+
}
169+
170+
define <4 x i1> @vctp32_3() {
171+
; CHECK-LABEL: @vctp32_3(
172+
; CHECK-NEXT: entry:
173+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 false>
174+
;
175+
entry:
176+
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 3)
177+
ret <4 x i1> %int
178+
}
179+
180+
define <4 x i1> @vctp32_4() {
181+
; CHECK-LABEL: @vctp32_4(
182+
; CHECK-NEXT: entry:
183+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
184+
;
185+
entry:
186+
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4)
187+
ret <4 x i1> %int
188+
}
189+
190+
define <4 x i1> @vctp32_100() {
191+
; CHECK-LABEL: @vctp32_100(
192+
; CHECK-NEXT: entry:
193+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
194+
;
195+
entry:
196+
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100)
197+
ret <4 x i1> %int
198+
}
199+
200+
define <4 x i1> @vctp32_m1() {
201+
; CHECK-LABEL: @vctp32_m1(
202+
; CHECK-NEXT: entry:
203+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
204+
;
205+
entry:
206+
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1)
207+
ret <4 x i1> %int
208+
}
209+
210+
211+
212+
define <4 x i1> @vctp64_0() {
213+
; CHECK-LABEL: @vctp64_0(
214+
; CHECK-NEXT: entry:
215+
; CHECK-NEXT: ret <4 x i1> zeroinitializer
216+
;
217+
entry:
218+
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 0)
219+
ret <4 x i1> %int
220+
}
221+
222+
define <4 x i1> @vctp64_1() {
223+
; CHECK-LABEL: @vctp64_1(
224+
; CHECK-NEXT: entry:
225+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 false, i1 false>
226+
;
227+
entry:
228+
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 1)
229+
ret <4 x i1> %int
230+
}
231+
232+
define <4 x i1> @vctp64_2() {
233+
; CHECK-LABEL: @vctp64_2(
234+
; CHECK-NEXT: entry:
235+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
236+
;
237+
entry:
238+
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 2)
239+
ret <4 x i1> %int
240+
}
241+
242+
define <4 x i1> @vctp64_100() {
243+
; CHECK-LABEL: @vctp64_100(
244+
; CHECK-NEXT: entry:
245+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
246+
;
247+
entry:
248+
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 100)
249+
ret <4 x i1> %int
250+
}
251+
252+
define <4 x i1> @vctp64_m1() {
253+
; CHECK-LABEL: @vctp64_m1(
254+
; CHECK-NEXT: entry:
255+
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
256+
;
257+
entry:
258+
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1)
259+
ret <4 x i1> %int
260+
}
261+
262+
263+
264+
declare <4 x i1> @llvm.arm.mve.vctp64(i32)
265+
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
266+
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
267+
declare <16 x i1> @llvm.arm.mve.vctp8(i32)

0 commit comments

Comments
 (0)