Skip to content

Commit

Permalink
[SveEmitter] Add builtins for contiguous prefetches
Browse files Browse the repository at this point in the history
This patch also adds the enum `sv_prfop` for the prefetch operation specifier
and checks to ensure the passed enum values are valid.

Reviewers: SjoerdMeijer, efriedma, ctetreau

Reviewed By: efriedma

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78674
  • Loading branch information
sdesmalen-arm committed Apr 24, 2020
1 parent 9cd4deb commit 823e2a6
Show file tree
Hide file tree
Showing 14 changed files with 654 additions and 7 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/TargetBuiltins.h
Expand Up @@ -238,6 +238,7 @@ namespace clang {
bool isOverloadDefault() const { return !(Flags & OverloadKindMask); }
bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; }
bool isOverloadCvt() const { return Flags & IsOverloadCvt; }
bool isPrefetch() const { return Flags & IsPrefetch; }

uint64_t getBits() const { return Flags; }
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
Expand Down
19 changes: 19 additions & 0 deletions clang/include/clang/Basic/arm_sve.td
Expand Up @@ -58,6 +58,7 @@
// -------------------
// prototype: return (arg, arg, ...)
//
// v: void
// x: vector of signed integers
// u: vector of unsigned integers
// d: default
Expand All @@ -82,6 +83,7 @@
// M: svfloat32_t
// N: svfloat64_t

// J: Prefetch type (sv_prfop)
// A: pointer to int8_t
// B: pointer to int16_t
// C: pointer to int32_t
Expand Down Expand Up @@ -176,6 +178,7 @@ def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type)
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
def IsByteIndexed : FlagType<0x01000000>;
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.

// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
class ImmCheckType<int val> {
Expand All @@ -193,6 +196,7 @@ def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt
def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1)
def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270]
def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270]
def ImmCheck0_13 : ImmCheckType<12>; // 0..13

class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
int Arg = arg;
Expand Down Expand Up @@ -543,6 +547,21 @@ def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEl
// Store one vector, with no truncation, non-temporal (scalar base, VL displacement)
def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">;

////////////////////////////////////////////////////////////////////////////////
// Prefetches

// Prefetch (Scalar base)
def SVPRFB : MInst<"svprfb", "vPcJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">;
def SVPRFH : MInst<"svprfh", "vPcJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">;
def SVPRFW : MInst<"svprfw", "vPcJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">;
def SVPRFD : MInst<"svprfd", "vPcJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">;

// Prefetch (Scalar base, VL displacement)
def SVPRFB_VNUM : MInst<"svprfb_vnum", "vPclJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">;
def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPclJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">;
def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">;
def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPclJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">;

////////////////////////////////////////////////////////////////////////////////
// Integer arithmetic

Expand Down
40 changes: 33 additions & 7 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -7568,6 +7568,13 @@ llvm::VectorType *CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
}
}

constexpr unsigned SVEBitsPerBlock = 128;

static llvm::VectorType* getSVEVectorForElementType(llvm::Type *EltTy) {
unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
return llvm::VectorType::get(EltTy, { NumElts, true });
}

// Reinterpret the input predicate so that it can be used to correctly isolate
// the elements of the specified datatype.
Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
Expand Down Expand Up @@ -7707,6 +7714,30 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, Ops);
}

Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned BuiltinID) {
auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
auto *VectorTy = getSVEVectorForElementType(MemEltTy);
auto *MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getElementCount());

Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
Value *BasePtr = Ops[1];

// Implement the index operand if not omitted.
if (Ops.size() > 3) {
BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo());
BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
}

// Prefetch intriniscs always expect an i8*
BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty));
Value *PrfOp = Ops.back();

Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
}

Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
llvm::Type *ReturnTy,
SmallVectorImpl<Value *> &Ops,
Expand Down Expand Up @@ -7759,13 +7790,6 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
return Builder.CreateCall(F, {Val, Predicate, BasePtr});
}

constexpr unsigned SVEBitsPerBlock = 128;

static llvm::VectorType* getSVEVectorForElementType(llvm::Type *EltTy) {
unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
return llvm::VectorType::get(EltTy, { NumElts, true });
}

// Limit the usage of scalable llvm IR generated by the ACLE by using the
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
Expand Down Expand Up @@ -7847,6 +7871,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isScatterStore())
return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isPrefetch())
return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (Builtin->LLVMIntrinsic != 0) {
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
InsertExplicitZeroOperand(Builder, Ty, Ops);
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Expand Up @@ -3927,6 +3927,9 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitSVEMaskedStore(const CallExpr *,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);

llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Expand Up @@ -2042,6 +2042,10 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
HasError = true;
break;
case SVETypeFlags::ImmCheck0_13:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
HasError = true;
break;
case SVETypeFlags::ImmCheck1_16:
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
HasError = true;
Expand Down
104 changes: 104 additions & 0 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
@@ -0,0 +1,104 @@
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s

#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
#else
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

void test_svprfb(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 0)
return svprfb(pg, base, SV_PLDL1KEEP);
}

void test_svprfb_1(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_1
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 1)
return svprfb(pg, base, SV_PLDL1STRM);
}

void test_svprfb_2(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_2
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 2)
return svprfb(pg, base, SV_PLDL2KEEP);
}

void test_svprfb_3(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_3
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 3)
return svprfb(pg, base, SV_PLDL2STRM);
}

void test_svprfb_4(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_4
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 4)
return svprfb(pg, base, SV_PLDL3KEEP);
}

void test_svprfb_5(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_5
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 5)
return svprfb(pg, base, SV_PLDL3STRM);
}

void test_svprfb_6(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_6
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 8)
return svprfb(pg, base, SV_PSTL1KEEP);
}

void test_svprfb_7(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_7
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 9)
return svprfb(pg, base, SV_PSTL1STRM);
}

void test_svprfb_8(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_8
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 10)
return svprfb(pg, base, SV_PSTL2KEEP);
}

void test_svprfb_9(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_9
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 11)
return svprfb(pg, base, SV_PSTL2STRM);
}

void test_svprfb_10(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_10
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 12)
return svprfb(pg, base, SV_PSTL3KEEP);
}

void test_svprfb_11(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfb_11
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %base, i32 13)
return svprfb(pg, base, SV_PSTL3STRM);
}

void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum)
{
// CHECK-LABEL: test_svprfb_vnum
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
// CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]], i32 0)
return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP);
}
118 changes: 118 additions & 0 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
@@ -0,0 +1,118 @@
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s

#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
#else
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif

void test_svprfd(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 0)
return svprfd(pg, base, SV_PLDL1KEEP);
}

void test_svprfd_1(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_1
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 1)
return svprfd(pg, base, SV_PLDL1STRM);
}

void test_svprfd_2(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_2
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 2)
return svprfd(pg, base, SV_PLDL2KEEP);
}

void test_svprfd_3(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_3
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 3)
return svprfd(pg, base, SV_PLDL2STRM);
}

void test_svprfd_4(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_4
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 4)
return svprfd(pg, base, SV_PLDL3KEEP);
}

void test_svprfd_5(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_5
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 5)
return svprfd(pg, base, SV_PLDL3STRM);
}

void test_svprfd_6(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_6
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 8)
return svprfd(pg, base, SV_PSTL1KEEP);
}

void test_svprfd_7(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_7
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 9)
return svprfd(pg, base, SV_PSTL1STRM);
}

void test_svprfd_8(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_8
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 10)
return svprfd(pg, base, SV_PSTL2KEEP);
}

void test_svprfd_9(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_9
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 11)
return svprfd(pg, base, SV_PSTL2STRM);
}

void test_svprfd_10(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_10
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 12)
return svprfd(pg, base, SV_PSTL3KEEP);
}

void test_svprfd_11(svbool_t pg, const void *base)
{
// CHECK-LABEL: test_svprfd_11
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %base, i32 13)
return svprfd(pg, base, SV_PSTL3STRM);
}

void test_svprfd_vnum(svbool_t pg, const void *base, int64_t vnum)
{
// CHECK-LABEL: test_svprfd_vnum
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
// CHECK-DAG: %[[BASE:.*]] = bitcast i8* %base to <vscale x 2 x i64>*
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum
// CHECK-DAG: %[[I8_BASE:.*]] = bitcast <vscale x 2 x i64>* %[[GEP]] to i8*
// CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
return svprfd_vnum(pg, base, vnum, SV_PLDL1KEEP);
}

0 comments on commit 823e2a6

Please sign in to comment.