Skip to content

Commit 7ad95ec

Browse files
committed
Support arbitrary addrspace pointers in masked load/store intrinsics
This is a resubmittion of 263158 change after fixing the existing problem with intrinsics mangling (see LTO and intrinsics mangling llvm-dev thread for details). This patch fixes the problem which occurs when loop-vectorize tries to use @llvm.masked.load/store intrinsic for a non-default addrspace pointer. It fails with "Calling a function with a bad signature!" assertion in CallInst constructor because it tries to pass a non-default addrspace pointer to the pointer argument which has default addrspace. The fix is to add pointer type as another overloaded type to @llvm.masked.load/store intrinsics. Reviewed By: reames Differential Revision: http://reviews.llvm.org/D17270 llvm-svn: 274043
1 parent f43266b commit 7ad95ec

File tree

12 files changed

+338
-206
lines changed

12 files changed

+338
-206
lines changed

llvm/docs/LangRef.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11565,12 +11565,12 @@ This is an overloaded intrinsic. The loaded data is a vector of any integer, flo
1156511565

1156611566
::
1156711567

11568-
declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
11569-
declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
11568+
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
11569+
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64 (<2 x double>* <ptr>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>)
1157011570
;; The data is a vector of pointers to double
11571-
declare <8 x double*> @llvm.masked.load.v8p0f64 (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
11571+
declare <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64 (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
1157211572
;; The data is a vector of function pointers
11573-
declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
11573+
declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
1157411574

1157511575
Overview:
1157611576
"""""""""
@@ -11593,7 +11593,7 @@ The result of this operation is equivalent to a regular vector load instruction
1159311593

1159411594
::
1159511595

11596-
%res = call <16 x float> @llvm.masked.load.v16f32 (<16 x float>* %ptr, i32 4, <16 x i1>%mask, <16 x float> %passthru)
11596+
%res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* %ptr, i32 4, <16 x i1>%mask, <16 x float> %passthru)
1159711597

1159811598
;; The result of the two following instructions is identical aside from potential memory access exception
1159911599
%loadlal = load <16 x float>, <16 x float>* %ptr, align 4
@@ -11610,12 +11610,12 @@ This is an overloaded intrinsic. The data stored in memory is a vector of any in
1161011610

1161111611
::
1161211612

11613-
declare void @llvm.masked.store.v8i32 (<8 x i32> <value>, <8 x i32>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
11614-
declare void @llvm.masked.store.v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
11613+
declare void @llvm.masked.store.v8i32.p0v8i32 (<8 x i32> <value>, <8 x i32>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
11614+
declare void @llvm.masked.store.v16f32.p0v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>)
1161511615
;; The data is a vector of pointers to double
11616-
declare void @llvm.masked.store.v8p0f64 (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
11616+
declare void @llvm.masked.store.v8p0f64.p0v8p0f64 (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
1161711617
;; The data is a vector of function pointers
11618-
declare void @llvm.masked.store.v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
11618+
declare void @llvm.masked.store.v4p0f_i32f.p0v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
1161911619

1162011620
Overview:
1162111621
"""""""""
@@ -11636,7 +11636,7 @@ The result of this operation is equivalent to a load-modify-store sequence. Howe
1163611636

1163711637
::
1163811638

11639-
call void @llvm.masked.store.v16f32(<16 x float> %value, <16 x float>* %ptr, i32 4, <16 x i1> %mask)
11639+
call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %value, <16 x float>* %ptr, i32 4, <16 x i1> %mask)
1164011640

1164111641
;; The result of the following instructions is identical aside from potential data races and memory access exceptions
1164211642
%oldval = load <16 x float>, <16 x float>* %ptr, align 4

llvm/include/llvm/IR/IRBuilder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,9 @@ class IRBuilderBase {
540540

541541
private:
542542
/// \brief Create a call to a masked intrinsic with given Id.
543-
/// Masked intrinsic has only one overloaded type - data type.
544543
CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef<Value *> Ops,
545-
Type *DataTy, const Twine &Name = "");
544+
ArrayRef<Type *> OverloadedTypes,
545+
const Twine &Name = "");
546546

547547
Value *getCastedInt8PtrValue(Value *Ptr);
548548
};

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -641,13 +641,14 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
641641

642642
//===-------------------------- Masked Intrinsics -------------------------===//
643643
//
644-
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
644+
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
645+
LLVMAnyPointerType<LLVMMatchType<0>>,
645646
llvm_i32_ty,
646647
LLVMVectorSameWidth<0, llvm_i1_ty>],
647648
[IntrArgMemOnly]>;
648649

649650
def int_masked_load : Intrinsic<[llvm_anyvector_ty],
650-
[LLVMPointerTo<0>, llvm_i32_ty,
651+
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
651652
LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
652653
[IntrReadMem, IntrArgMemOnly]>;
653654

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
149149
break;
150150
}
151151

152+
case 'm': {
153+
if (Name.startswith("masked.load.")) {
154+
Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
155+
if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
156+
F->setName(Name + ".old");
157+
NewFn = Intrinsic::getDeclaration(F->getParent(),
158+
Intrinsic::masked_load,
159+
Tys);
160+
return true;
161+
}
162+
}
163+
if (Name.startswith("masked.store.")) {
164+
auto Args = F->getFunctionType()->params();
165+
Type *Tys[] = { Args[0], Args[1] };
166+
if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
167+
F->setName(Name + ".old");
168+
NewFn = Intrinsic::getDeclaration(F->getParent(),
169+
Intrinsic::masked_store,
170+
Tys);
171+
return true;
172+
}
173+
}
174+
break;
175+
}
176+
152177
case 'o':
153178
// We only need to change the name to match the mangling including the
154179
// address space.
@@ -1201,6 +1226,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
12011226
CI->eraseFromParent();
12021227
return;
12031228
}
1229+
1230+
case Intrinsic::masked_load:
1231+
case Intrinsic::masked_store: {
1232+
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
1233+
CI->arg_operands().end());
1234+
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
1235+
CI->eraseFromParent();
1236+
return;
1237+
}
12041238
}
12051239
}
12061240

llvm/lib/IR/IRBuilder.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,15 @@ CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
212212
CallInst *IRBuilderBase::CreateMaskedLoad(Value *Ptr, unsigned Align,
213213
Value *Mask, Value *PassThru,
214214
const Twine &Name) {
215-
// DataTy is the overloaded type
216-
Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
215+
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
216+
Type *DataTy = PtrTy->getElementType();
217217
assert(DataTy->isVectorTy() && "Ptr should point to a vector");
218218
if (!PassThru)
219219
PassThru = UndefValue::get(DataTy);
220+
Type *OverloadedTypes[] = { DataTy, PtrTy };
220221
Value *Ops[] = { Ptr, getInt32(Align), Mask, PassThru};
221-
return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, DataTy, Name);
222+
return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops,
223+
OverloadedTypes, Name);
222224
}
223225

224226
/// \brief Create a call to a Masked Store intrinsic.
@@ -229,19 +231,22 @@ CallInst *IRBuilderBase::CreateMaskedLoad(Value *Ptr, unsigned Align,
229231
/// be accessed in memory
230232
CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
231233
unsigned Align, Value *Mask) {
234+
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
235+
Type *DataTy = PtrTy->getElementType();
236+
assert(DataTy->isVectorTy() && "Ptr should point to a vector");
237+
Type *OverloadedTypes[] = { DataTy, PtrTy };
232238
Value *Ops[] = { Val, Ptr, getInt32(Align), Mask };
233-
// Type of the data to be stored - the only one overloaded type
234-
return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, Val->getType());
239+
return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);
235240
}
236241

237242
/// Create a call to a Masked intrinsic, with given intrinsic Id,
238-
/// an array of operands - Ops, and one overloaded type - DataTy
243+
/// an array of operands - Ops, and an array of overloaded types -
244+
/// OverloadedTypes.
239245
CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
240246
ArrayRef<Value *> Ops,
241-
Type *DataTy,
247+
ArrayRef<Type *> OverloadedTypes,
242248
const Twine &Name) {
243249
Module *M = BB->getParent()->getParent();
244-
Type *OverloadedTypes[] = { DataTy };
245250
Value *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes);
246251
return createCallHelper(TheFn, Ops, this, Name);
247252
}
@@ -270,7 +275,7 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align,
270275

271276
// We specify only one type when we create this intrinsic. Types of other
272277
// arguments are derived from this type.
273-
return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, DataTy, Name);
278+
return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name);
274279
}
275280

276281
/// \brief Create a call to a Masked Scatter intrinsic.
@@ -300,7 +305,7 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
300305

301306
// We specify only one type when we create this intrinsic. Types of other
302307
// arguments are derived from this type.
303-
return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, DataTy);
308+
return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy });
304309
}
305310

306311
template <typename T0, typename T1, typename T2, typename T3>

llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,63 +7,63 @@
77
; AVX2: Found an estimated cost of 4 {{.*}}.masked
88
define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
99
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
10-
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
10+
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
1111
ret <2 x double> %res
1212
}
1313

1414
; AVX2-LABEL: test2
1515
; AVX2: Found an estimated cost of 4 {{.*}}.masked
1616
define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
1717
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
18-
%res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
18+
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
1919
ret <4 x i32> %res
2020
}
2121

2222
; AVX2-LABEL: test3
2323
; AVX2: Found an estimated cost of 4 {{.*}}.masked
2424
define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
2525
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
26-
call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
26+
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
2727
ret void
2828
}
2929

3030
; AVX2-LABEL: test4
3131
; AVX2: Found an estimated cost of 4 {{.*}}.masked
3232
define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
3333
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
34-
%res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
34+
%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
3535
ret <8 x float> %res
3636
}
3737

3838
; AVX2-LABEL: test5
3939
; AVX2: Found an estimated cost of 5 {{.*}}.masked
4040
define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
4141
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
42-
call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
42+
call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
4343
ret void
4444
}
4545

4646
; AVX2-LABEL: test6
4747
; AVX2: Found an estimated cost of 6 {{.*}}.masked
4848
define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
4949
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
50-
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
50+
call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
5151
ret void
5252
}
5353

5454
; AVX2-LABEL: test7
5555
; AVX2: Found an estimated cost of 5 {{.*}}.masked
5656
define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
5757
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
58-
%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
58+
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
5959
ret <2 x float> %res
6060
}
6161

6262
; AVX2-LABEL: test8
6363
; AVX2: Found an estimated cost of 6 {{.*}}.masked
6464
define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
6565
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
66-
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
66+
%res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
6767
ret <2 x i32> %res
6868
}
6969

@@ -279,24 +279,22 @@ declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4
279279
declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
280280
declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
281281

282-
declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
283-
declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
284-
declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
285-
declare void @llvm.masked.store.v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
286-
declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
287-
declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
288-
declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
289-
declare void @llvm.masked.store.v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
290-
declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
291-
declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
292-
declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
293-
declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
294-
declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
295-
declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
296-
declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
297-
declare <4 x double> @llvm.masked.load.v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
298-
declare <2 x double> @llvm.masked.load.v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
299-
declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
300-
declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
301-
declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
302-
282+
declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
283+
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
284+
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
285+
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
286+
declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
287+
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
288+
declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
289+
declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
290+
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
291+
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
292+
declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
293+
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
294+
declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
295+
declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
296+
declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
297+
declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
298+
declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
299+
declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
300+
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)

llvm/test/Assembler/auto_upgrade_intrinsics.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,24 @@ define i32 @test.objectsize() {
5858
ret i32 %s
5959
}
6060

61+
declare <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
62+
63+
define <2 x double> @tests.masked.load(<2 x double>* %ptr, <2 x i1> %mask, <2 x double> %passthru) {
64+
; CHECK-LABEL: @tests.masked.load(
65+
; CHECK: @llvm.masked.load.v2f64.p0v2f64
66+
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> %mask, <2 x double> %passthru)
67+
ret <2 x double> %res
68+
}
69+
70+
declare void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
71+
72+
define void @tests.masked.store(<2 x double>* %ptr, <2 x i1> %mask, <2 x double> %val) {
73+
; CHECK-LABEL: @tests.masked.store(
74+
; CHECK: @llvm.masked.store.v2f64.p0v2f64
75+
call void @llvm.masked.store.v2f64(<2 x double> %val, <2 x double>* %ptr, i32 3, <2 x i1> %mask)
76+
ret void
77+
}
78+
6179
@__stack_chk_guard = external global i8*
6280
declare void @llvm.stackprotectorcheck(i8**)
6381

llvm/test/CodeGen/X86/avx512-bugfix-26264.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
1818
; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
1919
; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
2020
; AVX512BW-NEXT: retq
21-
%res = call <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
21+
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
2222
ret <32 x double> %res
2323
}
2424

@@ -39,9 +39,9 @@ define <32 x i64> @test_load_32i64(<32 x i64>* %ptrs, <32 x i1> %mask, <32 x i64
3939
; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
4040
; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
4141
; AVX512BW-NEXT: retq
42-
%res = call <32 x i64> @llvm.masked.load.v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
42+
%res = call <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
4343
ret <32 x i64> %res
4444
}
4545

46-
declare <32 x i64> @llvm.masked.load.v32i64(<32 x i64>* %ptrs, i32, <32 x i1> %mask, <32 x i64> %src0)
47-
declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
46+
declare <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32, <32 x i1> %mask, <32 x i64> %src0)
47+
declare <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)

0 commit comments

Comments
 (0)