Skip to content
Browse files

AArch64: initial NEON support

Patch by Ana Pazos

- Completed implementation of instruction formats:
AdvSIMD three same
AdvSIMD modified immediate
AdvSIMD scalar pairwise

- Completed implementation of instruction classes
(some of the instructions in these classes
belong to yet unfinished instruction formats):
Vector Arithmetic
Vector Immediate
Vector Pairwise Arithmetic

- Initial implementation of instruction formats:
AdvSIMD scalar two-reg misc
AdvSIMD scalar three same

- Intial implementation of instruction class:
Scalar Arithmetic

- Initial clang changes to support arm v8 intrinsics.
Note: no clang changes for scalar intrinsics function name mangling yet.

- Comprehensive test cases for added instructions
To verify auto codegen, encoding, decoding, diagnosis, intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@187568 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information...
1 parent 789d82a commit b793f0d3448a15277cd6b6cc4ba558ded39a8084 @TNorthover TNorthover committed Aug 1, 2013
View
7 include/clang/Basic/BuiltinsAArch64.def
@@ -16,3 +16,10 @@
// In libgcc
BUILTIN(__clear_cache, "vv*v*", "i")
+// NEON
+#define GET_NEON_AARCH64_BUILTINS
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_AARCH64_BUILTINS
+#undef GET_NEON_BUILTINS
+
+#undef BUILTIN
View
3 include/clang/Basic/TargetBuiltins.h
@@ -91,7 +91,8 @@ namespace clang {
Poly8,
Poly16,
Float16,
- Float32
+ Float32,
+ Float64
};
NeonTypeFlags(unsigned F) : Flags(F) {}
View
110 include/clang/Basic/arm_neon.td
@@ -69,6 +69,7 @@ def OP_REINT : Op;
def OP_ABDL : Op;
def OP_ABA : Op;
def OP_ABAL : Op;
+def OP_DIV : Op;
class Inst <string n, string p, string t, Op o> {
string Name = n;
@@ -77,6 +78,7 @@ class Inst <string n, string p, string t, Op o> {
Op Operand = o;
bit isShift = 0;
bit isVCVT_N = 0;
+ bit isA64 = 0;
// Certain intrinsics have different names than their representative
// instructions. This field allows us to handle this correctly when we
@@ -145,6 +147,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
// l: long
// f: float
// h: half-float
+// d: double
// size modifiers:
// U: unsigned
@@ -452,3 +455,110 @@ def VREINTERPRET
// Vector fused multiply-add operations
def VFMA : SInst<"vfma", "dddd", "fQf">;
+
+////////////////////////////////////////////////////////////////////////////////
+// AArch64 Intrinsics
+
+let isA64 = 1 in {
+
+////////////////////////////////////////////////////////////////////////////////
+// Addition
+// With additional Qd type.
+def ADD : IOpInst<"vadd", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_ADD>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Subtraction
+// With additional Qd type.
+def SUB : IOpInst<"vsub", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_SUB>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Multiplication
+// With additional Qd type.
+def MUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>;
+def MLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>;
+def MLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Multiplication Extended
+def MULX : SInst<"vmulx", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Division
+def FDIV : IOpInst<"vdiv", "ddd", "fQfQd", OP_DIV>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector fused multiply-add operations
+// With additional Qd type.
+def FMLA : SInst<"vfma", "dddd", "fQfQd">;
+def FMLS : SInst<"vfms", "dddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Logical operations
+// With additional Qd type.
+def BSL : SInst<"vbsl", "dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Absolute Difference
+// With additional Qd type.
+def ABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Reciprocal/Sqrt
+// With additional Qd type.
+def FRECPS : IInst<"vrecps", "ddd", "fQfQd">;
+def FRSQRTS : IInst<"vrsqrts", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Comparison
+// With additional Qd type.
+def FCAGE : IInst<"vcage", "udd", "fQfQd">;
+def FCAGT : IInst<"vcagt", "udd", "fQfQd">;
+def FCALE : IInst<"vcale", "udd", "fQfQd">;
+def FCALT : IInst<"vcalt", "udd", "fQfQd">;
+// With additional Ql, QUl, Qd types.
+def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcQcQsQiQlQUcQUsQUiQUlQPc">;
+def CFMEQ : SOpInst<"vceq", "udd",
+ "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
+def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>;
+def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE>;
+def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>;
+def CFMLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LT>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Max/Min Integer
+// With additional Qd type.
+def MAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+def MIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// MaxNum/MinNum Floating Point
+def FMAXNM : SInst<"vmaxnm", "ddd", "fQfQd">;
+def FMINNM : SInst<"vminnm", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Pairwise Max/Min
+// With additional Qc Qs Qi QUc QUs QUi Qf Qd types.
+def MAXP : SInst<"vpmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+def MINP : SInst<"vpmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Pairwise MaxNum/MinNum Floating Point
+def FMAXNMP : SInst<"vpmaxnm", "ddd", "fQfQd">;
+def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Pairwise Addition
+// With additional Qc Qs Qi QUc QUs QUi Qf Qd types.
+def ADDP : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Arithmetic
+
+// Scalar Addition
+
+def SCALAR_ADD : Inst<"vaddd", "ddd", "lUl", OP_ADD>;
+
+// Scalar Subtraction
+def SCALAR_SUB : Inst<"vsubd", "ddd", "lUl", OP_SUB>;
+
+}
View
2 include/clang/Sema/Sema.h
@@ -7557,7 +7557,7 @@ class Sema {
bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
-
+ bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool SemaBuiltinVAStart(CallExpr *TheCall);
View
75 lib/AST/ItaniumMangle.cpp
@@ -360,6 +360,7 @@ class CXXNameMangler {
void mangleBareFunctionType(const FunctionType *T,
bool MangleReturnType);
void mangleNeonVectorType(const VectorType *T);
+ void mangleAArch64NeonVectorType(const VectorType *T);
void mangleIntegerLiteral(QualType T, const llvm::APSInt &Value);
void mangleMemberExpr(const Expr *base, bool isArrow,
@@ -2174,7 +2175,9 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) {
case BuiltinType::LongLong: EltName = "int64_t"; break;
case BuiltinType::ULongLong: EltName = "uint64_t"; break;
case BuiltinType::Float: EltName = "float32_t"; break;
- default: llvm_unreachable("unexpected Neon vector element type");
+ case BuiltinType::Half: EltName = "float16_t";break;
+ default:
+ llvm_unreachable("unexpected Neon vector element type");
}
}
const char *BaseName = 0;
@@ -2190,6 +2193,70 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) {
Out << BaseName << EltName;
}
+static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) {
+ switch (EltType->getKind()) {
+ case BuiltinType::SChar:
+ return "Int8";
+ case BuiltinType::Short:
+ return "Int16";
+ case BuiltinType::Int:
+ return "Int32";
+ case BuiltinType::LongLong:
+ return "Int64";
+ case BuiltinType::UChar:
+ return "Uint8";
+ case BuiltinType::UShort:
+ return "Uint16";
+ case BuiltinType::UInt:
+ return "Uint32";
+ case BuiltinType::ULongLong:
+ return "Uint64";
+ case BuiltinType::Half:
+ return "Float16";
+ case BuiltinType::Float:
+ return "Float32";
+ case BuiltinType::Double:
+ return "Float64";
+ default:
+ llvm_unreachable("Unexpected vector element base type");
+ }
+}
+
+// AArch64's ABI for Neon vector types specifies that they should be mangled as
+// the equivalent internal name. The vector type must be one of the special
+// types predefined by ARM.
+void CXXNameMangler::mangleAArch64NeonVectorType(const VectorType *T) {
+ QualType EltType = T->getElementType();
+ assert(EltType->isBuiltinType() && "Neon vector element not a BuiltinType");
+ unsigned BitSize =
+ (T->getNumElements() * getASTContext().getTypeSize(EltType));
+
+ assert((BitSize == 64 || BitSize == 128) &&
+ "Neon vector type not 64 or 128 bits");
+
+ assert(getASTContext().getTypeSize(EltType) != BitSize &&
+ "Vector of 1 element not permitted");
+
+ StringRef EltName;
+ if (T->getVectorKind() == VectorType::NeonPolyVector) {
+ switch (cast<BuiltinType>(EltType)->getKind()) {
+ case BuiltinType::UChar:
+ EltName = "Poly8";
+ break;
+ case BuiltinType::UShort:
+ EltName = "Poly16";
+ break;
+ default:
+ llvm_unreachable("unexpected Neon polynomial vector element type");
+ }
+ } else
+ EltName = mangleAArch64VectorBase(cast<BuiltinType>(EltType));
+
+ std::string TypeName =
+ ("__" + EltName + "x" + llvm::utostr(T->getNumElements()) + "_t").str();
+ Out << TypeName.length() << TypeName;
+}
+
// GNU extension: vector types
// <type> ::= <vector-type>
// <vector-type> ::= Dv <positive dimension number> _
@@ -2201,7 +2268,11 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) {
void CXXNameMangler::mangleType(const VectorType *T) {
if ((T->getVectorKind() == VectorType::NeonVector ||
T->getVectorKind() == VectorType::NeonPolyVector)) {
- mangleNeonVectorType(T);
+ if (getASTContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::aarch64)
+ mangleAArch64NeonVectorType(T);
+ else
+ mangleNeonVectorType(T);
return;
}
Out << "Dv" << T->getNumElements() << '_';
View
39 lib/Basic/Targets.cpp
@@ -3177,7 +3177,14 @@ class AArch64TargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
static const TargetInfo::GCCRegAlias GCCRegAliases[];
+ enum FPUModeEnum {
+ FPUMode,
+ NeonMode
+ };
+
+ unsigned FPU;
static const Builtin::Info BuiltinInfo[];
+
public:
AArch64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
@@ -3242,17 +3249,43 @@ class AArch64TargetInfo : public TargetInfo {
Opts.ShortEnums ? "1" : "4");
if (BigEndian)
- Builder.defineMacro("__ARM_BIG_ENDIAN");
+ Builder.defineMacro("__AARCH_BIG_ENDIAN");
+
+ if (FPU == NeonMode) {
+ Builder.defineMacro("__AARCH_FEATURE_ADVSIMD");
+
+ // 64-bit NEON supports half, single and double precision operations.
+ Builder.defineMacro("__AARCH_ADVSIMD_FP", "0xe");
+ }
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
NumRecords = clang::AArch64::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual bool hasFeature(StringRef Feature) const {
- return Feature == "aarch64";
+ return Feature == "aarch64" || (Feature == "neon" && FPU == NeonMode);
}
- virtual void getGCCRegNames(const char * const *&Names,
+
+ virtual bool setFeatureEnabled(llvm::StringMap<bool> &Features,
+ StringRef Name, bool Enabled) const {
+ if (Name == "neon") {
+ Features[Name] = Enabled;
+ return true;
+ }
+
+ return false;
+ }
+
+ virtual void HandleTargetFeatures(std::vector<std::string> &Features) {
+ FPU = FPUMode;
+ for (unsigned i = 0, e = Features.size(); i != e; ++i) {
+ if (Features[i] == "+neon")
+ FPU = NeonMode;
+ }
+ }
+
+ virtual void getGCCRegNames(const char *const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
View
197 lib/CodeGen/CGBuiltin.cpp
@@ -1614,6 +1614,8 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad);
case NeonTypeFlags::Float32:
return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad);
+ case NeonTypeFlags::Float64:
+ return llvm::VectorType::get(CGF->DoubleTy, 1 << IsQuad);
}
llvm_unreachable("Invalid NeonTypeFlags element type!");
}
@@ -1718,7 +1720,200 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
}
- return 0;
+ SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ }
+
+ // Get the last argument, which specifies the vector type.
+ llvm::APSInt Result;
+ const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+ if (!Arg->isIntegerConstantExpr(Result, getContext()))
+ return 0;
+
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags Type(Result.getZExtValue());
+ bool usgn = Type.isUnsigned();
+
+ llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::Type *Ty = VTy;
+ if (!Ty)
+ return 0;
+
+ unsigned Int;
+ switch (BuiltinID) {
+ default:
+ return 0;
+
+ // AArch64 builtins mapping to legacy ARM v7 builtins.
+ // FIXME: the mapped builtins listed correspond to what has been tested
+ // in aarch64-neon-intrinsics.c so far.
+ case AArch64::BI__builtin_neon_vmul_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E);
+ case AArch64::BI__builtin_neon_vmulq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E);
+ case AArch64::BI__builtin_neon_vabd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E);
+ case AArch64::BI__builtin_neon_vabdq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E);
+ case AArch64::BI__builtin_neon_vfma_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E);
+ case AArch64::BI__builtin_neon_vfmaq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E);
+ case AArch64::BI__builtin_neon_vbsl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E);
+ case AArch64::BI__builtin_neon_vbslq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E);
+ case AArch64::BI__builtin_neon_vrsqrts_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E);
+ case AArch64::BI__builtin_neon_vrsqrtsq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E);
+ case AArch64::BI__builtin_neon_vrecps_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E);
+ case AArch64::BI__builtin_neon_vrecpsq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E);
+ case AArch64::BI__builtin_neon_vcage_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E);
+ case AArch64::BI__builtin_neon_vcale_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E);
+ case AArch64::BI__builtin_neon_vcaleq_v:
+ std::swap(Ops[0], Ops[1]);
+ case AArch64::BI__builtin_neon_vcageq_v: {
+ Function *F;
+ if (VTy->getElementType()->isIntegerTy(64))
+ F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq);
+ else
+ F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
+ return EmitNeonCall(F, Ops, "vcage");
+ }
+ case AArch64::BI__builtin_neon_vcalt_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E);
+ case AArch64::BI__builtin_neon_vcagt_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E);
+ case AArch64::BI__builtin_neon_vcaltq_v:
+ std::swap(Ops[0], Ops[1]);
+ case AArch64::BI__builtin_neon_vcagtq_v: {
+ Function *F;
+ if (VTy->getElementType()->isIntegerTy(64))
+ F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq);
+ else
+ F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
+ return EmitNeonCall(F, Ops, "vcagt");
+ }
+ case AArch64::BI__builtin_neon_vtst_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E);
+ case AArch64::BI__builtin_neon_vtstq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E);
+ case AArch64::BI__builtin_neon_vhadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E);
+ case AArch64::BI__builtin_neon_vhaddq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E);
+ case AArch64::BI__builtin_neon_vhsub_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E);
+ case AArch64::BI__builtin_neon_vhsubq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E);
+ case AArch64::BI__builtin_neon_vrhadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E);
+ case AArch64::BI__builtin_neon_vrhaddq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E);
+ case AArch64::BI__builtin_neon_vqadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E);
+ case AArch64::BI__builtin_neon_vqaddq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E);
+ case AArch64::BI__builtin_neon_vqsub_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E);
+ case AArch64::BI__builtin_neon_vqsubq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E);
+ case AArch64::BI__builtin_neon_vshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E);
+ case AArch64::BI__builtin_neon_vshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E);
+ case AArch64::BI__builtin_neon_vqshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E);
+ case AArch64::BI__builtin_neon_vqshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E);
+ case AArch64::BI__builtin_neon_vrshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E);
+ case AArch64::BI__builtin_neon_vrshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E);
+ case AArch64::BI__builtin_neon_vqrshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
+ case AArch64::BI__builtin_neon_vqrshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
+ case AArch64::BI__builtin_neon_vmax_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
+ case AArch64::BI__builtin_neon_vmaxq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E);
+ case AArch64::BI__builtin_neon_vmin_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E);
+ case AArch64::BI__builtin_neon_vminq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E);
+ case AArch64::BI__builtin_neon_vpmax_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E);
+ case AArch64::BI__builtin_neon_vpmin_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E);
+ case AArch64::BI__builtin_neon_vpadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E);
+ case AArch64::BI__builtin_neon_vqdmulh_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E);
+ case AArch64::BI__builtin_neon_vqdmulhq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E);
+ case AArch64::BI__builtin_neon_vqrdmulh_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
+ case AArch64::BI__builtin_neon_vqrdmulhq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
+
+ // AArch64-only builtins
+ case AArch64::BI__builtin_neon_vfms_v:
+ case AArch64::BI__builtin_neon_vfmsq_v: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[1] = Builder.CreateFNeg(Ops[1]);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+
+ // LLVM's fma intrinsic puts the accumulator in the last position, but the
+ // AArch64 intrinsic has it first.
+ return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ }
+ case AArch64::BI__builtin_neon_vmaxnm_v:
+ case AArch64::BI__builtin_neon_vmaxnmq_v: {
+ Int = Intrinsic::aarch64_neon_vmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
+ }
+ case AArch64::BI__builtin_neon_vminnm_v:
+ case AArch64::BI__builtin_neon_vminnmq_v: {
+ Int = Intrinsic::aarch64_neon_vminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
+ }
+ case AArch64::BI__builtin_neon_vpmaxnm_v:
+ case AArch64::BI__builtin_neon_vpmaxnmq_v: {
+ Int = Intrinsic::aarch64_neon_vpmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
+ }
+ case AArch64::BI__builtin_neon_vpminnm_v:
+ case AArch64::BI__builtin_neon_vpminnmq_v: {
+ Int = Intrinsic::aarch64_neon_vpminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
+ }
+ case AArch64::BI__builtin_neon_vpmaxq_v: {
+ Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
+ }
+ case AArch64::BI__builtin_neon_vpminq_v: {
+ Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
+ }
+ case AArch64::BI__builtin_neon_vpaddq_v: {
+ Int = Intrinsic::arm_neon_vpadd;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
+ }
+ case AArch64::BI__builtin_neon_vmulx_v:
+ case AArch64::BI__builtin_neon_vmulxq_v: {
+ Int = Intrinsic::aarch64_neon_vmulx;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
+ }
+ }
}
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
View
14 lib/Driver/Tools.cpp
@@ -1407,6 +1407,14 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,
CmdArgs.push_back ("-machine-sink-split=0");
}
+void Clang::AddAArch64TargetArgs(const ArgList &Args,
+ ArgStringList &CmdArgs) const {
+ const Driver &D = getToolChain().getDriver();
+ // Honor -mfpu=.
+ if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ))
+ addFPUArgs(D, A, Args, CmdArgs);
+}
+
static bool
shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
const llvm::Triple &Triple) {
@@ -2498,9 +2506,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::hexagon:
AddHexagonTargetArgs(Args, CmdArgs);
break;
- }
-
+ case llvm::Triple::aarch64:
+ AddAArch64TargetArgs(Args, CmdArgs);
+ break;
+ }
// Pass the linker version in use.
if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
View
2 lib/Driver/Tools.h
@@ -65,6 +65,8 @@ using llvm::opt::ArgStringList;
llvm::opt::ArgStringList &CmdArgs) const;
void AddHexagonTargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
+ void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs) const;
enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile };
View
87 lib/Sema/SemaChecking.cpp
@@ -306,6 +306,10 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall))
return ExprError();
break;
+ case llvm::Triple::aarch64:
+ if (CheckAArch64BuiltinFunctionCall(BuiltinID, TheCall))
+ return ExprError();
+ break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
@@ -342,6 +346,9 @@ static unsigned RFT(unsigned t, bool shift = false) {
case NeonTypeFlags::Float32:
assert(!shift && "cannot shift float types!");
return (2 << IsQuad) - 1;
+ case NeonTypeFlags::Float64:
+ assert(!shift && "cannot shift float types!");
+ return (1 << IsQuad) - 1;
}
llvm_unreachable("Invalid NeonTypeFlag!");
}
@@ -367,10 +374,90 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) {
return Context.UnsignedShortTy;
case NeonTypeFlags::Float32:
return Context.FloatTy;
+ case NeonTypeFlags::Float64:
+ return Context.DoubleTy;
}
llvm_unreachable("Invalid NeonTypeFlag!");
}
+bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
+ CallExpr *TheCall) {
+
+ llvm::APSInt Result;
+
+ uint64_t mask = 0;
+ unsigned TV = 0;
+ int PtrArgNum = -1;
+ bool HasConstPtr = false;
+ switch (BuiltinID) {
+#define GET_NEON_AARCH64_OVERLOAD_CHECK
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_AARCH64_OVERLOAD_CHECK
+ }
+
+ // For NEON intrinsics which are overloaded on vector element type, validate
+ // the immediate which specifies which variant to emit.
+ unsigned ImmArg = TheCall->getNumArgs() - 1;
+ if (mask) {
+ if (SemaBuiltinConstantArg(TheCall, ImmArg, Result))
+ return true;
+
+ TV = Result.getLimitedValue(64);
+ if ((TV > 63) || (mask & (1ULL << TV)) == 0)
+ return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code)
+ << TheCall->getArg(ImmArg)->getSourceRange();
+ }
+
+ if (PtrArgNum >= 0) {
+ // Check that pointer arguments have the specified type.
+ Expr *Arg = TheCall->getArg(PtrArgNum);
+ if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg))
+ Arg = ICE->getSubExpr();
+ ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg);
+ QualType RHSTy = RHS.get()->getType();
+ QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context);
+ if (HasConstPtr)
+ EltTy = EltTy.withConst();
+ QualType LHSTy = Context.getPointerType(EltTy);
+ AssignConvertType ConvTy;
+ ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS);
+ if (RHS.isInvalid())
+ return true;
+ if (DiagnoseAssignmentResult(ConvTy, Arg->getLocStart(), LHSTy, RHSTy,
+ RHS.get(), AA_Assigning))
+ return true;
+ }
+
+ // For NEON intrinsics which take an immediate value as part of the
+ // instruction, range check them here.
+ unsigned i = 0, l = 0, u = 0;
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_NEON_AARCH64_IMMEDIATE_CHECK
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_AARCH64_IMMEDIATE_CHECK
+ }
+ ;
+
+ // We can't check the value of a dependent argument.
+ if (TheCall->getArg(i)->isTypeDependent() ||
+ TheCall->getArg(i)->isValueDependent())
+ return false;
+
+ // Check that the immediate argument is actually a constant.
+ if (SemaBuiltinConstantArg(TheCall, i, Result))
+ return true;
+
+ // Range check against the upper/lower values for this isntruction.
+ unsigned Val = Result.getZExtValue();
+ if (Val < l || Val > (u + l))
+ return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
+ << l << u + l << TheCall->getArg(i)->getSourceRange();
+
+ return false;
+}
+
bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall) {
assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
BuiltinID == ARM::BI__builtin_arm_strex) &&
View
47 lib/Sema/SemaType.cpp
@@ -4603,6 +4603,42 @@ static void HandleExtVectorTypeAttr(QualType &CurType,
CurType = T;
}
+static bool isPermittedNeonBaseType(QualType &Ty,
+ VectorType::VectorKind VecKind,
+ bool IsAArch64) {
+ const BuiltinType *BTy = Ty->getAs<BuiltinType>();
+ if (!BTy)
+ return false;
+
+ if (VecKind == VectorType::NeonPolyVector) {
+ if (IsAArch64) {
+ // AArch64 polynomial vectors are unsigned
+ return BTy->getKind() == BuiltinType::UChar ||
+ BTy->getKind() == BuiltinType::UShort;
+ } else {
+ // AArch32 polynomial vector are signed.
+ return BTy->getKind() == BuiltinType::SChar ||
+ BTy->getKind() == BuiltinType::Short;
+ }
+ }
+
+ // Non-polynomial vector types: the usual suspects are allowed, as well as
+ // float64_t on AArch64.
+ if (IsAArch64 && BTy->getKind() == BuiltinType::Double)
+ return true;
+
+ return BTy->getKind() == BuiltinType::SChar ||
+ BTy->getKind() == BuiltinType::UChar ||
+ BTy->getKind() == BuiltinType::Short ||
+ BTy->getKind() == BuiltinType::UShort ||
+ BTy->getKind() == BuiltinType::Int ||
+ BTy->getKind() == BuiltinType::UInt ||
+ BTy->getKind() == BuiltinType::LongLong ||
+ BTy->getKind() == BuiltinType::ULongLong ||
+ BTy->getKind() == BuiltinType::Float ||
+ BTy->getKind() == BuiltinType::Half;
+}
+
/// HandleNeonVectorTypeAttr - The "neon_vector_type" and
/// "neon_polyvector_type" attributes are used to create vector types that
/// are mangled according to ARM's ABI. Otherwise, these types are identical
@@ -4646,9 +4682,14 @@ static void HandleNeonVectorTypeAttr(QualType& CurType,
BTy->getKind() != BuiltinType::LongLong &&
BTy->getKind() != BuiltinType::ULongLong &&
BTy->getKind() != BuiltinType::Float)) {
- S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) <<CurType;
- Attr.setInvalid();
- return;
+ llvm::Triple::ArchType Arch =
+ S.Context.getTargetInfo().getTriple().getArch();
+ if (!isPermittedNeonBaseType(CurType, VecKind,
+ Arch == llvm::Triple::aarch64)) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType;
+ Attr.setInvalid();
+ return;
+ }
}
// The total size of the vector must be 64 or 128 bits.
unsigned typeSize = static_cast<unsigned>(S.Context.getTypeSize(CurType));
View
3,023 test/CodeGen/aarch64-neon-intrinsics.c
3,023 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
85 test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp
@@ -0,0 +1,85 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - | FileCheck %s
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed long long int64_t;
+typedef unsigned long long uint64_t;
+typedef unsigned char poly8_t;
+typedef unsigned short poly16_t;
+typedef __fp16 float16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
+typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
+typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
+typedef __attribute__((neon_vector_type(2))) int int32x2_t;
+typedef __attribute__((neon_vector_type(4))) int int32x4_t;
+typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
+typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
+typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
+typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
+typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(2))) unsigned int uint32x2_t;
+typedef __attribute__((neon_vector_type(4))) unsigned int uint32x4_t;
+typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
+typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
+typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
+typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
+
+// CHECK: 10__Int8x8_t
+void f1(int8x8_t) {}
+// CHECK: 11__Int16x4_t
+void f2(int16x4_t) {}
+// CHECK: 11__Int32x2_t
+void f3(int32x2_t) {}
+// CHECK: 11__Uint8x8_t
+void f4(uint8x8_t) {}
+// CHECK: 12__Uint16x4_t
+void f5(uint16x4_t) {}
+// CHECK: 13__Float16x4_t
+void f6(float16x4_t) {}
+// CHECK: 13__Float16x8_t
+void f7(float16x8_t) {}
+// CHECK: 12__Uint32x2_t
+void f8(uint32x2_t) {}
+// CHECK: 13__Float32x2_t
+void f9(float32x2_t) {}
+// CHECK: 13__Float32x4_t
+void f10(float32x4_t) {}
+// CHECK: 11__Poly8x8_t
+void f11(poly8x8_t v) {}
+// CHECK: 12__Poly16x4_t
+void f12(poly16x4_t v) {}
+// CHECK:12__Poly8x16_t
+void f13(poly8x16_t v) {}
+// CHECK:12__Poly16x8_t
+void f14(poly16x8_t v) {}
+// CHECK: 11__Int8x16_t
+void f15(int8x16_t) {}
+// CHECK: 11__Int16x8_t
+void f16(int16x8_t) {}
+// CHECK:11__Int32x4_t
+void f17(int32x4_t) {}
+// CHECK: 12__Uint8x16_t
+void f18(uint8x16_t) {}
+// CHECK: 12__Uint16x8_t
+void f19(uint16x8_t) {}
+// CHECK: 12__Uint32x4_t
+void f20(uint32x4_t) {}
+// CHECK: 11__Int64x2_t
+void f21(int64x2_t) {}
+// CHECK: 12__Uint64x2_t
+void f22(uint64x2_t) {}
+// CHECK: 13__Float64x2_t
+void f23(float64x2_t) {}
View
17 test/CodeGenCXX/mangle-neon-vectors.cpp
@@ -1,6 +1,7 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple arm-none-linux-gnueabi %s -emit-llvm -o - | FileCheck %s
typedef float float32_t;
+typedef __fp16 float16_t;
typedef signed char poly8_t;
typedef short poly16_t;
typedef unsigned long long uint64_t;
@@ -11,8 +12,10 @@ typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
-typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
-typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
// CHECK: 16__simd64_int32_t
void f1(int32x2_t v) { }
@@ -26,7 +29,11 @@ void f4(uint64x2_t v) { }
void f5(float32x2_t v) { }
// CHECK: 19__simd128_float32_t
void f6(float32x4_t v) { }
+// CHECK: 18__simd64_float16_t
+void f7(float16x4_t v) {}
+// CHECK: 19__simd128_float16_t
+void f8(float16x8_t v) {}
// CHECK: 17__simd128_poly8_t
-void f7(poly8x16_t v) { }
+void f9(poly8x16_t v) {}
// CHECK: 18__simd128_poly16_t
-void f8(poly16x8_t v) { }
+void f10(poly16x8_t v) {}
View
3 test/Preprocessor/aarch64-target-features.c
@@ -30,3 +30,6 @@
// RUN: %clang -target aarch64-none-linux-gnu -fshort-enums -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTENUMS %s
// CHECK-SHORTENUMS: __ARM_SIZEOF_MINIMAL_ENUM 1
+// RUN: %clang -target aarch64-none-linux-gnu -mfpu=neon -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NEON %s
+// CHECK-NEON: __AARCH_ADVSIMD_FP
+// CHECK-NEON: __AARCH_FEATURE_ADVSIMD
View
34 test/Sema/aarch64-neon-vector-types.c
@@ -0,0 +1,34 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 %s -triple aarch64-none-linux-gnu -fsyntax-only -verify
+
+typedef float float32_t;
+typedef unsigned char poly8_t;
+typedef unsigned short poly16_t;
+typedef unsigned long long uint64_t;
+
+// Define some valid Neon types.
+typedef __attribute__((neon_vector_type(2))) int int32x2_t;
+typedef __attribute__((neon_vector_type(4))) int int32x4_t;
+typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
+typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
+typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
+typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
+
+// The attributes must have a single argument.
+typedef __attribute__((neon_vector_type(2, 4))) int only_one_arg; // expected-error{{attribute takes one argument}}
+
+// The number of elements must be an ICE.
+typedef __attribute__((neon_vector_type(2.0))) int non_int_width; // expected-error{{attribute requires an integer constant}}
+
+// Only certain element types are allowed.
+typedef __attribute__((neon_vector_type(2))) double double_elt;
+typedef __attribute__((neon_vector_type(4))) void* ptr_elt; // expected-error{{invalid vector element type}}
+typedef __attribute__((neon_polyvector_type(4))) float32_t bad_poly_elt; // expected-error{{invalid vector element type}}
+struct aggr { signed char c; };
+typedef __attribute__((neon_vector_type(8))) struct aggr aggregate_elt; // expected-error{{invalid vector element type}}
+
+// The total vector size must be 64 or 128 bits.
+typedef __attribute__((neon_vector_type(1))) int int32x1_t; // expected-error{{Neon vector size must be 64 or 128 bits}}
+typedef __attribute__((neon_vector_type(3))) int int32x3_t; // expected-error{{Neon vector size must be 64 or 128 bits}}
View
574 utils/TableGen/NeonEmitter.cpp
@@ -90,7 +90,8 @@ enum OpKind {
OpReinterpret,
OpAbdl,
OpAba,
- OpAbal
+ OpAbal,
+ OpDiv
};
enum ClassKind {
@@ -127,7 +128,8 @@ class NeonTypeFlags {
Poly8,
Poly16,
Float16,
- Float32
+ Float32,
+ Float64
};
NeonTypeFlags(unsigned F) : Flags(F) {}
@@ -205,6 +207,7 @@ class NeonEmitter {
OpMap["OP_ABDL"] = OpAbdl;
OpMap["OP_ABA"] = OpAba;
OpMap["OP_ABAL"] = OpAbal;
+ OpMap["OP_DIV"] = OpDiv;
Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst");
@@ -235,7 +238,18 @@ class NeonEmitter {
void runTests(raw_ostream &o);
private:
- void emitIntrinsic(raw_ostream &OS, Record *R);
+ void emitIntrinsic(raw_ostream &OS, Record *R,
+ StringMap<ClassKind> &EmittedMap);
+ void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64GenBuiltinDef);
+ void genOverloadTypeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64TypeCheck);
+ void genIntrinsicRangeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64RangeCheck);
+ void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
+ bool isA64TestGen);
};
} // end anonymous namespace
@@ -259,6 +273,7 @@ static void ParseTypes(Record *r, std::string &s,
case 'l':
case 'h':
case 'f':
+ case 'd':
break;
default:
PrintFatalError(r->getLoc(),
@@ -347,6 +362,8 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
poly = false;
if (type == 'f')
type = 'i';
+ if (type == 'd')
+ type = 'l';
break;
case 'x':
usgn = false;
@@ -470,6 +487,13 @@ static std::string TypeString(const char mod, StringRef typestr) {
break;
s += quad ? "x4" : "x2";
break;
+ case 'd':
+ s += "float64";
+ if (scal)
+ break;
+ s += quad ? "x2" : "x1";
+ break;
+
default:
PrintFatalError("unhandled type!");
}
@@ -647,6 +671,18 @@ static void InstructionTypeCode(const StringRef &typeStr,
default: break;
}
break;
+ case 'd':
+ switch (ck) {
+ case ClassS:
+ case ClassI:
+ typeCode += "f64";
+ break;
+ case ClassW:
+ PrintFatalError("unhandled type!");
+ default:
+ break;
+ }
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1252,6 +1288,9 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) {
case 'l': nElts = 1; break;
case 'h': nElts = 4; break;
case 'f': nElts = 2; break;
+ case 'd':
+ nElts = 1;
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1488,6 +1527,9 @@ static std::string GenOpString(OpKind op, const std::string &proto,
}
break;
}
+ case OpDiv:
+ s += "__a / __b;";
+ break;
default:
PrintFatalError("unknown OpKind!");
}
@@ -1533,6 +1575,9 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
case 'f':
ET = NeonTypeFlags::Float32;
break;
+ case 'd':
+ ET = NeonTypeFlags::Float64;
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1776,27 +1821,47 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#ifndef __ARM_NEON_H\n";
OS << "#define __ARM_NEON_H\n\n";
- OS << "#ifndef __ARM_NEON__\n";
+ OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
OS << "#error \"NEON support not enabled\"\n";
OS << "#endif\n\n";
OS << "#include <stdint.h>\n\n";
// Emit NEON-specific scalar typedefs.
OS << "typedef float float32_t;\n";
+ OS << "typedef __fp16 float16_t;\n";
+
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef double float64_t;\n";
+ OS << "#endif\n\n";
+
+ // For now, signedness of polynomial types depends on target
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef uint8_t poly8_t;\n";
+ OS << "typedef uint16_t poly16_t;\n";
+ OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
- OS << "typedef uint16_t float16_t;\n";
+ OS << "#endif\n";
// Emit Neon vector typedefs.
- std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
+ std::string TypedefTypes(
+ "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
SmallVector<StringRef, 24> TDTypeVec;
ParseTypes(0, TypedefTypes, TDTypeVec);
// Emit vector typedefs.
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
bool dummy, quad = false, poly = false;
- (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ bool isA64 = false;
+
+ if (type == 'd' && quad)
+ isA64 = true;
+
+ if (isA64)
+ OS << "#ifdef __aarch64__\n";
+
if (poly)
OS << "typedef __attribute__((neon_polyvector_type(";
else
@@ -1809,50 +1874,96 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << TypeString('s', TDTypeVec[i]);
OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
+
+ if (isA64)
+ OS << "#endif\n";
}
OS << "\n";
// Emit struct typedefs.
for (unsigned vi = 2; vi != 5; ++vi) {
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
+ bool dummy, quad = false, poly = false;
+ char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ bool isA64 = false;
+
+ if (type == 'd' && quad)
+ isA64 = true;
+
+ if (isA64)
+ OS << "#ifdef __aarch64__\n";
+
std::string ts = TypeString('d', TDTypeVec[i]);
std::string vs = TypeString('0' + vi, TDTypeVec[i]);
OS << "typedef struct " << vs << " {\n";
OS << " " << ts << " val";
OS << "[" << utostr(vi) << "]";
OS << ";\n} ";
- OS << vs << ";\n\n";
+ OS << vs << ";\n";
+
+ if (isA64)
+ OS << "#endif\n";
+
+ OS << "\n";
}
}
OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<ClassKind> EmittedMap;
+
// Emit vmovl, vmull and vabd intrinsics first so they can be used by other
// intrinsics. (Some of the saturating multiply instructions are also
// used to implement the corresponding "_lane" variants, but tablegen
// sorts the records into alphabetical order so that the "_lane" variants
// come after the intrinsics they use.)
- emitIntrinsic(OS, Records.getDef("VMOVL"));
- emitIntrinsic(OS, Records.getDef("VMULL"));
- emitIntrinsic(OS, Records.getDef("VABD"));
-
+ emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
+
+ // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
+ // common intrinsics appear only once in the output stream.
+ // The check for uniquiness is done in emitIntrinsic.
+ // Emit ARM intrinsics.
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
- if (R->getName() != "VMOVL" &&
- R->getName() != "VMULL" &&
+
+ // Skip AArch64 intrinsics; they will be emitted at the end.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (isA64)
+ continue;
+
+ if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
R->getName() != "VABD")
- emitIntrinsic(OS, R);
+ emitIntrinsic(OS, R, EmittedMap);
}
+ // Emit AArch64-specific intrinsics.
+ OS << "#ifdef __aarch64__\n";
+
+ for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+ Record *R = RV[i];
+
+ // Skip ARM intrinsics already included above.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64)
+ continue;
+
+ emitIntrinsic(OS, R, EmittedMap);
+ }
+
+ OS << "#endif\n\n";
+
OS << "#undef __ai\n\n";
OS << "#endif /* __ARM_NEON_H */\n";
}
/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
-/// intrinsics specified by record R.
-void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
+/// intrinsics specified by record R checking for intrinsic uniqueness.
+void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
+ StringMap<ClassKind> &EmittedMap) {
std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
std::string Types = R->getValueAsString("Types");
@@ -1879,12 +1990,20 @@ void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
if (srcti == ti || inQuad != outQuad)
continue;
- OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
- OpCast, ClassS);
+ std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
+ OpCast, ClassS);
+ if (EmittedMap.count(s))
+ continue;
+ EmittedMap[s] = ClassS;
+ OS << s;
}
} else {
- OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
- kind, classKind);
+ std::string s =
+ GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
+ if (EmittedMap.count(s))
+ continue;
+ EmittedMap[s] = classKind;
+ OS << s;
}
}
OS << "\n";
@@ -1912,56 +2031,151 @@ static unsigned RangeFromType(const char mod, StringRef typestr) {
}
}
-/// runHeader - Emit a file with sections defining:
-/// 1. the NEON section of BuiltinsARM.def.
-/// 2. the SemaChecking code for the type overload checking.
-/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
-void NeonEmitter::runHeader(raw_ostream &OS) {
- std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
-
+/// Generate the ARM and AArch64 intrinsic range checking code for
+/// shift/lane immediates, checking for unique declarations.
+void
+NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64RangeCheck) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
StringMap<OpKind> EmittedMap;
- // Generate BuiltinsARM.def for NEON
- OS << "#ifdef GET_NEON_BUILTINS\n";
+ // Generate the intrinsic range checking code for shift/lane immediates.
+ if (isA64RangeCheck)
+ OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
+ else
+ OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
+
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
if (k != OpNone)
continue;
+ std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
+ std::string Types = R->getValueAsString("Types");
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
if (Proto.find('a') != std::string::npos)
continue;
- std::string Types = R->getValueAsString("Types");
+ // Functions which do not have an immediate do not need to have range
+ // checking code emitted.
+ size_t immPos = Proto.find('i');
+ if (immPos == std::string::npos)
+ continue;
+
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
if (R->getSuperClasses().size() < 2)
PrintFatalError(R->getLoc(), "Builtin has no class kind");
- std::string name = R->getValueAsString("Name");
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+ // Do not include AArch64 range checks if not generating code for AArch64.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64RangeCheck && isA64)
+ continue;
+
+ // Include ARM range checks in AArch64 but only if ARM intrinsics are not
+ // redefined by AArch64 to handle new types.
+ if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(name)) {
+ ClassKind &A64CK = A64IntrinsicMap[name];
+ if (A64CK == ck && ck != ClassNone)
+ continue;
+ }
+
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- // Generate the BuiltinsARM.def declaration for this builtin, ensuring
- // that each unique BUILTIN() macro appears only once in the output
- // stream.
- std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
- if (EmittedMap.count(bd))
+ std::string namestr, shiftstr, rangestr;
+
+ if (R->getValueAsBit("isVCVT_N")) {
+ // VCVT between floating- and fixed-point values takes an immediate
+ // in the range 1 to 32.
+ ck = ClassB;
+ rangestr = "l = 1; u = 31"; // upper bound = l + u
+ } else if (Proto.find('s') == std::string::npos) {
+ // Builtins which are overloaded by type will need to have their upper
+ // bound computed at Sema time based on the type constant.
+ ck = ClassB;
+ if (R->getValueAsBit("isShift")) {
+ shiftstr = ", true";
+
+ // Right shifts have an 'r' in the name, left shifts do not.
+ if (name.find('r') != std::string::npos)
+ rangestr = "l = 1; ";
+ }
+ rangestr += "u = RFT(TV" + shiftstr + ")";
+ } else {
+ // The immediate generally refers to a lane in the preceding argument.
+ assert(immPos > 0 && "unexpected immediate operand");
+ rangestr =
+ "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
+ }
+ // Make sure cases appear only once by uniquing them in a string map.
+ namestr = MangleName(name, TypeVec[ti], ck);
+ if (EmittedMap.count(namestr))
continue;
+ EmittedMap[namestr] = OpNone;
- EmittedMap[bd] = OpNone;
- OS << bd << "\n";
+ // Calculate the index of the immediate that should be range checked.
+ unsigned immidx = 0;
+
+ // Builtins that return a struct of multiple vectors have an extra
+ // leading arg for the struct return.
+ if (Proto[0] >= '2' && Proto[0] <= '4')
+ ++immidx;
+
+ // Add one to the index for each argument until we reach the immediate
+ // to be checked. Structs of vectors are passed as multiple arguments.
+ for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
+ switch (Proto[ii]) {
+ default:
+ immidx += 1;
+ break;
+ case '2':
+ immidx += 2;
+ break;
+ case '3':
+ immidx += 3;
+ break;
+ case '4':
+ immidx += 4;
+ break;
+ case 'i':
+ ie = ii + 1;
+ break;
+ }
+ }
+ if (isA64RangeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
+ << rangestr << "; break;\n";
}
}
OS << "#endif\n\n";
+}
+
+/// Generate the ARM and AArch64 overloaded type checking code for
+/// SemaChecking.cpp, checking for unique builtin declarations.
+void
+NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64TypeCheck) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<OpKind> EmittedMap;
// Generate the overloaded type checking code for SemaChecking.cpp
- OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
+ if (isA64TypeCheck)
+ OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
+ else
+ OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
+
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
@@ -1988,6 +2202,21 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
if (R->getSuperClasses().size() < 2)
PrintFatalError(R->getLoc(), "Builtin has no class kind");
+ // Do not include AArch64 type checks if not generating code for AArch64.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64TypeCheck && isA64)
+ continue;
+
+ // Include ARM type check in AArch64 but only if ARM intrinsics
+ // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
+ // redefined in AArch64 to handle an additional 2 x f64 type.
+ ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+ if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(name)) {
+ ClassKind &A64CK = A64IntrinsicMap[name];
+ if (A64CK == ck && ck != ClassNone)
+ continue;
+ }
+
int si = -1, qi = -1;
uint64_t mask = 0, qmask = 0;
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
@@ -2035,19 +2264,25 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
}
if (mask) {
- OS << "case ARM::BI__builtin_neon_"
- << MangleName(name, TypeVec[si], ClassB)
- << ": mask = " << "0x" << utohexstr(mask) << "ULL";
+ if (isA64TypeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
+ << "0x" << utohexstr(mask) << "ULL";
if (PtrArgNum >= 0)
OS << "; PtrArgNum = " << PtrArgNum;
if (HasConstPtr)
OS << "; HasConstPtr = true";
OS << "; break;\n";
}
if (qmask) {
- OS << "case ARM::BI__builtin_neon_"
- << MangleName(name, TypeVec[qi], ClassB)
- << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
+ if (isA64TypeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
+ << "0x" << utohexstr(qmask) << "ULL";
if (PtrArgNum >= 0)
OS << "; PtrArgNum = " << PtrArgNum;
if (HasConstPtr)
@@ -2056,31 +2291,37 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
}
}
OS << "#endif\n\n";
+}
+
+/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
+/// declaration of builtins, checking for unique builtin declarations.
+void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64GenBuiltinDef) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<OpKind> EmittedMap;
+
+ // Generate BuiltinsARM.def and BuiltinsAArch64.def
+ if (isA64GenBuiltinDef)
+ OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
+ else
+ OS << "#ifdef GET_NEON_BUILTINS\n";
- // Generate the intrinsic range checking code for shift/lane immediates.
- OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
-
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
if (k != OpNone)
continue;
- std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
- std::string Types = R->getValueAsString("Types");
+ std::string name = R->getValueAsString("Name");
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
if (Proto.find('a') != std::string::npos)
continue;
- // Functions which do not have an immediate do not need to have range
- // checking code emitted.
- size_t immPos = Proto.find('i');
- if (immPos == std::string::npos)
- continue;
-
+ std::string Types = R->getValueAsString("Types");
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
@@ -2089,70 +2330,90 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- std::string namestr, shiftstr, rangestr;
-
- if (R->getValueAsBit("isVCVT_N")) {
- // VCVT between floating- and fixed-point values takes an immediate
- // in the range 1 to 32.
- ck = ClassB;
- rangestr = "l = 1; u = 31"; // upper bound = l + u
- } else if (Proto.find('s') == std::string::npos) {
- // Builtins which are overloaded by type will need to have their upper
- // bound computed at Sema time based on the type constant.
- ck = ClassB;
- if (R->getValueAsBit("isShift")) {
- shiftstr = ", true";
+ // Do not include AArch64 BUILTIN() macros if not generating
+ // code for AArch64
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64GenBuiltinDef && isA64)
+ continue;
- // Right shifts have an 'r' in the name, left shifts do not.
- if (name.find('r') != std::string::npos)
- rangestr = "l = 1; ";
- }
- rangestr += "u = RFT(TV" + shiftstr + ")";
- } else {
- // The immediate generally refers to a lane in the preceding argument.
- assert(immPos > 0 && "unexpected immediate operand");
- rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
- }
- // Make sure cases appear only once by uniquing them in a string map.
- namestr = MangleName(name, TypeVec[ti], ck);
- if (EmittedMap.count(namestr))
+ // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics
+ // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
+ // redefined in AArch64 to handle an additional 2 x f64 type.
+ if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(name)) {
+ ClassKind &A64CK = A64IntrinsicMap[name];
+ if (A64CK == ck && ck != ClassNone)
continue;
- EmittedMap[namestr] = OpNone;
-
- // Calculate the index of the immediate that should be range checked.
- unsigned immidx = 0;
+ }
- // Builtins that return a struct of multiple vectors have an extra
- // leading arg for the struct return.
- if (Proto[0] >= '2' && Proto[0] <= '4')
- ++immidx;
+ for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+ // Generate the declaration for this builtin, ensuring
+ // that each unique BUILTIN() macro appears only once in the output
+ // stream.
+ std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
+ if (EmittedMap.count(bd))
+ continue;
- // Add one to the index for each argument until we reach the immediate
- // to be checked. Structs of vectors are passed as multiple arguments.
- for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
- switch (Proto[ii]) {
- default: immidx += 1; break;
- case '2': immidx += 2; break;
- case '3': immidx += 3; break;
- case '4': immidx += 4; break;
- case 'i': ie = ii + 1; break;
- }
- }
- OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
- << ": i = " << immidx << "; " << rangestr << "; break;\n";
+ EmittedMap[bd] = OpNone;
+ OS << bd << "\n";
}
}
OS << "#endif\n\n";
}
+/// runHeader - Emit a file with sections defining:
+/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
+/// 2. the SemaChecking code for the type overload checking.
+/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
+void NeonEmitter::runHeader(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+
+ // build a map of AArch64 intriniscs to be used in uniqueness checks.
+ StringMap<ClassKind> A64IntrinsicMap;
+ for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+ Record *R = RV[i];
+
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64)
+ continue;
+
+ ClassKind CK = ClassNone;
+ if (R->getSuperClasses().size() >= 2)
+ CK = ClassMap[R->getSuperClasses()[1]];
+
+ std::string Name = R->getValueAsString("Name");
+ if (A64IntrinsicMap.count(Name))
+ continue;
+ A64IntrinsicMap[Name] = CK;
+ }
+
+ // Generate BuiltinsARM.def for ARM
+ genBuiltinsDef(OS, A64IntrinsicMap, false);
+
+ // Generate BuiltinsAArch64.def for AArch64
+ genBuiltinsDef(OS, A64IntrinsicMap, true);
+
+ // Generate ARM overloaded type checking code for SemaChecking.cpp
+ genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
+
+ // Generate AArch64 overloaded type checking code for SemaChecking.cpp
+ genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
+
+ // Generate ARM range checking code for shift/lane immediates.
+ genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
+
+ // Generate the AArch64 range checking code for shift/lane immediates.
+ genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
+}
+
/// GenTest - Write out a test for the intrinsic specified by the name and
/// type strings, including the embedded patterns for FileCheck to match.
static std::string GenTest(const std::string &name,
const std::string &proto,
StringRef outTypeStr, StringRef inTypeStr,
bool isShift, bool isHiddenLOp,
- ClassKind ck, const std::string &InstName) {
+ ClassKind ck, const std::string &InstName,
+ bool isA64,
+ std::string & testFuncProto) {
assert(!proto.empty() && "");
std::string s;
@@ -2167,36 +2428,45 @@ static std::string GenTest(const std::string &name,
mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
}
+ // todo: GenerateChecksForIntrinsic does not generate CHECK
+ // for aarch64 instructions yet
std::vector<std::string> FileCheckPatterns;
- GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
- isHiddenLOp, FileCheckPatterns);
+ if (!isA64) {
+ GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
+ isHiddenLOp, FileCheckPatterns);
+ s+= "// CHECK_ARM: test_" + mangledName + "\n";
+ }
+ s += "// CHECK_AARCH64: test_" + mangledName + "\n";
// Emit the FileCheck patterns.
- s += "// CHECK: test_" + mangledName + "\n";
// If for any reason we do not want to emit a check, mangledInst
// will be the empty string.
if (FileCheckPatterns.size()) {
for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
e = FileCheckPatterns.end();
i != e;
++i) {
- s += "// CHECK: " + *i + "\n";
+ s += "// CHECK_ARM: " + *i + "\n";
}
}
// Emit the start of the test function.
- s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
+
+ testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
char arg = 'a';
std::string comma;
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
// Do not create arguments for values that must be immediate constants.
if (proto[i] == 'i')
continue;
- s += comma + TypeString(proto[i], inTypeStr) + " ";
- s.push_back(arg);
+ testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
+ testFuncProto.push_back(arg);
comma = ", ";
}
- s += ") {\n ";
+ testFuncProto += ")";
+
+ s+= testFuncProto;
+ s+= " {\n ";
if (proto[0] != 'v')
s += "return ";
@@ -2220,20 +2490,14 @@ static std::string GenTest(const std::string &name,
return s;
}
-/// runTests - Write out a complete set of tests for all of the Neon
-/// intrinsics.
-void NeonEmitter::runTests(raw_ostream &OS) {
- OS <<
- "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n"
- "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
- "// RUN: | FileCheck %s\n"
- "\n"
- "// REQUIRES: long_tests\n"
- "\n"
- "#include <arm_neon.h>\n"
- "\n";
+/// Write out all intrinsic tests for the specified target, checking
+/// for intrinsic test uniqueness.
+void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
+ bool isA64GenTest) {
+ if (isA64GenTest)
+ OS << "#ifdef __aarch64__\n";
- std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
std::string name = R->getValueAsString("Name");
@@ -2242,6 +2506,12 @@ void NeonEmitter::runTests(raw_ostream &OS) {
bool isShift = R->getValueAsBit("isShift");
std::string InstName = R->getValueAsString("InstName");
bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
+ bool isA64 = R->getValueAsBit("isA64");
+
+ // do not include AArch64 intrinsic test if not generating
+ // code for AArch64
+ if (!isA64GenTest && isA64)
+ continue;
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
@@ -2261,16 +2531,56 @@ void NeonEmitter::runTests(raw_ostream &OS) {
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
if (srcti == ti || inQuad != outQuad)
continue;
- OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
- isShift, isHiddenLOp, ck, InstName);
+ std::string testFuncProto;
+ std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
+ isShift, isHiddenLOp, ck, InstName, isA64,
+ testFuncProto);
+ if (EmittedMap.count(testFuncProto))
+ continue;
+ EmittedMap[testFuncProto] = kind;
+ OS << s << "\n";
}
} else {
- OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti],
- isShift, isHiddenLOp, ck, InstName);
+ std::string testFuncProto;
+ std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
+ isHiddenLOp, ck, InstName, isA64, testFuncProto);
+ if (EmittedMap.count(testFuncProto))
+ continue;
+ EmittedMap[testFuncProto] = kind;
+ OS << s << "\n";
}
}
- OS << "\n";
}
+
+ if (isA64GenTest)
+ OS << "#endif\n";
+}
+/// runTests - Write out a complete set of tests for all of the Neon
+/// intrinsics.
+void NeonEmitter::runTests(raw_ostream &OS) {
+ OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
+ "apcs-gnu\\\n"
+ "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
+ "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
+ "\n"
+ "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
+ "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
+ "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
+ "\n"
+ "// REQUIRES: long_tests\n"
+ "\n"
+ "#include <arm_neon.h>\n"
+ "\n";
+
+ // ARM tests must be emitted before AArch64 tests to ensure
+ // tests for intrinsics that are common to ARM and AArch64
+ // appear only once in the output stream.
+ // The check for uniqueness is done in genTargetTest.
+ StringMap<OpKind> EmittedMap;
+
+ genTargetTest(OS, EmittedMap, false);
+
+ genTargetTest(OS, EmittedMap, true);
}
namespace clang {

0 comments on commit b793f0d

Please sign in to comment.
Something went wrong with that request. Please try again.