Skip to content

Commit

Permalink
[SveEmitter] Implement builtins for gathers/scatters
Browse files Browse the repository at this point in the history
This patch adds builtins for:
  * regular, first-faulting and non-temporal gather loads
  * regular and non-temporal scatter stores

Differential Revision: https://reviews.llvm.org/D77735
  • Loading branch information
banach-space committed Apr 22, 2020
1 parent 67266d8 commit 72f5658
Show file tree
Hide file tree
Showing 30 changed files with 5,781 additions and 3 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/TargetBuiltins.h
Expand Up @@ -222,6 +222,7 @@ namespace clang {
bool isStructLoad() const { return Flags & IsStructLoad; }
bool isStructStore() const { return Flags & IsStructStore; }
bool isZExtReturn() const { return Flags & IsZExtReturn; }
bool isByteIndexed() const { return Flags & IsByteIndexed; }

uint64_t getBits() const { return Flags; }
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
Expand Down
281 changes: 281 additions & 0 deletions clang/include/clang/Basic/arm_sve.td

Large diffs are not rendered by default.

163 changes: 161 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -7463,8 +7463,56 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
return Op;
}

/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
/// access builtin. Only required if it can't be inferred from the base pointer
/// operand.
llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) {
switch (TypeFlags.getMemEltType()) {
case SVETypeFlags::MemEltTyDefault:
return getEltType(TypeFlags);
case SVETypeFlags::MemEltTyInt8:
return Builder.getInt8Ty();
case SVETypeFlags::MemEltTyInt16:
return Builder.getInt16Ty();
case SVETypeFlags::MemEltTyInt32:
return Builder.getInt32Ty();
case SVETypeFlags::MemEltTyInt64:
return Builder.getInt64Ty();
}
llvm_unreachable("Unknown MemEltType");
}

llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) {
switch (TypeFlags.getEltType()) {
default:
llvm_unreachable("Invalid SVETypeFlag!");

case SVETypeFlags::EltTyInt8:
return Builder.getInt8Ty();
case SVETypeFlags::EltTyInt16:
return Builder.getInt16Ty();
case SVETypeFlags::EltTyInt32:
return Builder.getInt32Ty();
case SVETypeFlags::EltTyInt64:
return Builder.getInt64Ty();

case SVETypeFlags::EltTyFloat16:
return Builder.getHalfTy();
case SVETypeFlags::EltTyFloat32:
return Builder.getFloatTy();
case SVETypeFlags::EltTyFloat64:
return Builder.getDoubleTy();

case SVETypeFlags::EltTyBool8:
case SVETypeFlags::EltTyBool16:
case SVETypeFlags::EltTyBool32:
case SVETypeFlags::EltTyBool64:
return Builder.getInt1Ty();
}
}

// Return the llvm vector type corresponding to the specified element TypeFlags.
llvm::Type *CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
llvm::VectorType *CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
switch (TypeFlags.getEltType()) {
default:
llvm_unreachable("Invalid SVETypeFlag!");
Expand Down Expand Up @@ -7528,6 +7576,113 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
return C;
}

Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
auto *ResultTy = getSVEType(TypeFlags);
auto *OverloadedTy = llvm::VectorType::get(SVEBuiltinMemEltTy(TypeFlags),
ResultTy->getElementCount());

// At the ACLE level there's only one predicate type, svbool_t, which is
// mapped to <n x 16 x i1>. However, this might be incompatible with the
// actual type being loaded. For example, when loading doubles (i64) the
// predicated should be <n x 2 x i1> instead. At the IR level the type of
// the predicate and the data being loaded must match. Cast accordingly.
Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);

Function *F = nullptr;
if (Ops[1]->getType()->isVectorTy())
// This is the "vector base, scalar offset" case. In order to uniquely
// map this built-in to an LLVM IR intrinsic, we need both the return type
// and the type of the vector base.
F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
else
// This is the "scalar base, vector offset case". The type of the offset
// is encoded in the name of the intrinsic. We only need to specify the
// return type in order to uniquely map this built-in to an LLVM IR
// intrinsic.
F = CGM.getIntrinsic(IntID, OverloadedTy);

// Pass 0 when the offset is missing. This can only be applied when using
// the "vector base" addressing mode for which ACLE allows no offset. The
// corresponding LLVM IR always requires an offset.
if (Ops.size() == 2) {
assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
Ops.push_back(ConstantInt::get(Int64Ty, 0));
}

// For "vector base, scalar index" scale the index so that it becomes a
// scalar offset.
if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
unsigned BytesPerElt =
OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
Ops[2] = Builder.CreateMul(Ops[2], Scale);
}

Value *Call = Builder.CreateCall(F, Ops);

// The following sext/zext is only needed when ResultTy != OverloadedTy. In
// other cases it's folded into a nop.
return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
: Builder.CreateSExt(Call, ResultTy);
}

Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
auto *SrcDataTy = getSVEType(TypeFlags);
auto *OverloadedTy = llvm::VectorType::get(SVEBuiltinMemEltTy(TypeFlags),
SrcDataTy->getElementCount());

// In ACLE the source data is passed in the last argument, whereas in LLVM IR
// it's the first argument. Move it accordingly.
Ops.insert(Ops.begin(), Ops.pop_back_val());

Function *F = nullptr;
if (Ops[2]->getType()->isVectorTy())
// This is the "vector base, scalar offset" case. In order to uniquely
// map this built-in to an LLVM IR intrinsic, we need both the return type
// and the type of the vector base.
F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
else
// This is the "scalar base, vector offset case". The type of the offset
// is encoded in the name of the intrinsic. We only need to specify the
// return type in order to uniquely map this built-in to an LLVM IR
// intrinsic.
F = CGM.getIntrinsic(IntID, OverloadedTy);

// Pass 0 when the offset is missing. This can only be applied when using
// the "vector base" addressing mode for which ACLE allows no offset. The
// corresponding LLVM IR always requires an offset.
if (Ops.size() == 3) {
assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
Ops.push_back(ConstantInt::get(Int64Ty, 0));
}

// Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
// folded into a nop.
Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);

// At the ACLE level there's only one predicate type, svbool_t, which is
// mapped to <n x 16 x i1>. However, this might be incompatible with the
// actual type being stored. For example, when storing doubles (i64) the
// predicated should be <n x 2 x i1> instead. At the IR level the type of
// the predicate and the data being stored must match. Cast accordingly.
Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);

// For "vector base, scalar index" scale the index so that it becomes a
// scalar offset.
if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
unsigned BytesPerElt =
OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
Ops[3] = Builder.CreateMul(Ops[3], Scale);
}

return Builder.CreateCall(F, Ops);
}

Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
llvm::Type *ReturnTy,
SmallVectorImpl<Value *> &Ops,
Expand Down Expand Up @@ -7639,8 +7794,12 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
TypeFlags.isZExtReturn());
else if (TypeFlags.isStore())
return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isGatherLoad())
return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isScatterStore())
return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (Builtin->LLVMIntrinsic != 0) {
llvm::Type* OverloadedTy = getSVEType(TypeFlags);
llvm::VectorType *OverloadedTy = getSVEType(TypeFlags);

if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
InsertExplicitZeroOperand(Builder, Ty, Ops);
Expand Down
14 changes: 13 additions & 1 deletion clang/lib/CodeGen/CodeGenFunction.h
Expand Up @@ -3903,9 +3903,21 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
llvm::Type *Ty, bool usgn, const char *name);
llvm::Value *vectorWrapScalar16(llvm::Value *Op);
/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
/// access builtin. Only required if it can't be inferred from the base
/// pointer operand.
llvm::Type *SVEBuiltinMemEltTy(SVETypeFlags TypeFlags);

llvm::Type *getSVEType(const SVETypeFlags &TypeFlags);
llvm::Type *getEltType(SVETypeFlags TypeFlags);

llvm::VectorType *getSVEType(const SVETypeFlags &TypeFlags);
llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::VectorType *VTy);
llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
llvm::Value *EmitSVEScatterStore(SVETypeFlags TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID, bool IsZExtReturn);
Expand Down

0 comments on commit 72f5658

Please sign in to comment.