[AArch64][SVE] Allow vector of pointers as legal type for masked load…

…/store. Refer to LangRef http://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics 'llvm.masked.load/store.*’ intrinsics are overloaded intrinsic, which allow the load/store data to be a vector of any integer, floating-point or pointer data type. Therefore, allow pointer data type when checking 'isLegalMaskedLoadStore()'. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D85045
llvm · Aug 1, 2020 · 01bfe2e · 01bfe2e
1 parent 234f51a
commit 01bfe2e
Show file tree

Hide file tree

Showing 2 changed files with 105 additions and 0 deletions.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -166,6 +166,9 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
       return false;
 
     Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
+    if (Ty->isPointerTy())
+      return true;
+
     if (Ty->isBFloatTy() || Ty->isHalfTy() ||
         Ty->isFloatTy() || Ty->isDoubleTy())
       return true;

diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -188,6 +188,94 @@ define void @masked_store_nxv8bf16(<vscale x 8 x bfloat> *%a, <vscale x 8 x bflo
   ret void
 }
 
+;
+; Masked load store of pointer data type
+;
+
+; Pointer of integer type
+
+define <vscale x 2 x i8*> @masked.load.nxv2p0i8(<vscale x 2 x i8*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.load.nxv2p0i8:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8*> @llvm.masked.load.nxv2p0i8.p0nxv2p0i8(<vscale x 2 x i8*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i8*> undef)
+  ret <vscale x 2 x i8*> %v
+}
+define <vscale x 2 x i16*> @masked.load.nxv2p0i16(<vscale x 2 x i16*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.load.nxv2p0i16:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16*> @llvm.masked.load.nxv2p0i16.p0nxv2p0i16(<vscale x 2 x i16*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i16*> undef)
+  ret <vscale x 2 x i16*> %v
+}
+define <vscale x 2 x i32*> @masked.load.nxv2p0i32(<vscale x 2 x i32*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.load.nxv2p0i32:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32*> @llvm.masked.load.nxv2p0i32.p0nxv2p0i32(<vscale x 2 x i32*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i32*> undef)
+  ret <vscale x 2 x i32*> %v
+}
+define <vscale x 2 x i64*> @masked.load.nxv2p0i64(<vscale x 2 x i64*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.load.nxv2p0i64:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64*> @llvm.masked.load.nxv2p0i64.p0nxv2p0i64(<vscale x 2 x i64*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64*> undef)
+  ret <vscale x 2 x i64*> %v
+}
+
+; Pointer of floating-point type
+
+define <vscale x 2 x bfloat*> @masked.load.nxv2p0bf16(<vscale x 2 x bfloat*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind #0 {
+; CHECK-LABEL: masked.load.nxv2p0bf16:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x bfloat*> @llvm.masked.load.nxv2p0bf16.p0nxv2p0bf16(<vscale x 2 x bfloat*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat*> undef)
+  ret <vscale x 2 x bfloat*> %v
+}
+define <vscale x 2 x half*> @masked.load.nxv2p0f16(<vscale x 2 x half*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.load.nxv2p0f16:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x half*> @llvm.masked.load.nxv2p0f16.p0nxv2p0f16(<vscale x 2 x half*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x half*> undef)
+  ret <vscale x 2 x half*> %v
+}
+define <vscale x 2 x float*> @masked.load.nxv2p0f32(<vscale x 2 x float*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.load.nxv2p0f32:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x float*> @llvm.masked.load.nxv2p0f32.p0nxv2p0f32(<vscale x 2 x float*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x float*> undef)
+  ret <vscale x 2 x float*> %v
+}
+define <vscale x 2 x double*> @masked.load.nxv2p0f64(<vscale x 2 x double*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.load.nxv2p0f64:
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x double*> @llvm.masked.load.nxv2p0f64.p0nxv2p0f64(<vscale x 2 x double*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double*> undef)
+  ret <vscale x 2 x double*> %v
+}
+
+; Pointer of array type
+
+define void @masked.store.nxv2p0a64i16(<vscale x 2 x [64 x i16]*> %data, <vscale x 2 x [64 x i16]*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.store.nxv2p0a64i16:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.nxv2p0a64i16.p0nxv2p0a64i16(<vscale x 2 x [64 x i16]*> %data, <vscale x 2 x [64 x i16]*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask)
+  ret void
+}
+
+; Pointer of struct type
+
+%struct = type { i8*, i32 }
+define void @masked.store.nxv2p0s_struct(<vscale x 2 x %struct*> %data, <vscale x 2 x %struct*>* %vector_ptr, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked.store.nxv2p0s_struct:
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.nxv2p0s_struct.p0nxv2p0s_struct(<vscale x 2 x %struct*> %data, <vscale x 2 x %struct*>* %vector_ptr, i32 8, <vscale x 2 x i1> %mask)
+  ret void
+}
+
+
 declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
 declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
 declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
@@ -214,5 +302,19 @@ declare void @llvm.masked.store.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>
 declare void @llvm.masked.store.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>*, i32, <vscale x 8 x i1>)
 declare void @llvm.masked.store.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>*, i32, <vscale x 8 x i1>)
 
+declare <vscale x 2 x i8*> @llvm.masked.load.nxv2p0i8.p0nxv2p0i8(<vscale x 2 x i8*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i8*>)
+declare <vscale x 2 x i16*> @llvm.masked.load.nxv2p0i16.p0nxv2p0i16(<vscale x 2 x i16*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i16*>)
+declare <vscale x 2 x i32*> @llvm.masked.load.nxv2p0i32.p0nxv2p0i32(<vscale x 2 x i32*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i32*>)
+declare <vscale x 2 x i64*> @llvm.masked.load.nxv2p0i64.p0nxv2p0i64(<vscale x 2 x i64*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i64*>)
+
+declare <vscale x 2 x bfloat*> @llvm.masked.load.nxv2p0bf16.p0nxv2p0bf16(<vscale x 2 x bfloat*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x bfloat*>)
+declare <vscale x 2 x half*> @llvm.masked.load.nxv2p0f16.p0nxv2p0f16(<vscale x 2 x half*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x half*>)
+declare <vscale x 2 x float*> @llvm.masked.load.nxv2p0f32.p0nxv2p0f32(<vscale x 2 x float*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x float*>)
+declare <vscale x 2 x double*> @llvm.masked.load.nxv2p0f64.p0nxv2p0f64(<vscale x 2 x double*>*, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x double*>)
+
+declare void @llvm.masked.store.nxv2p0a64i16.p0nxv2p0a64i16(<vscale x 2 x [64 x i16]*>, <vscale x 2 x [64 x i16]*>*, i32 immarg, <vscale x 2 x i1>)
+
+declare void @llvm.masked.store.nxv2p0s_struct.p0nxv2p0s_struct(<vscale x 2 x %struct*>, <vscale x 2 x %struct*>*, i32 immarg, <vscale x 2 x i1>)
+
 ; +bf16 is required for the bfloat version.
 attributes #0 = { "target-features"="+sve,+bf16" }