Skip to content

Commit

Permalink
[ARM] Pass half or i16 types for NEON intrinsics
Browse files Browse the repository at this point in the history
For generating NEON intrinsics, this determines the NEON data type, and whether
it should be a half type or an i16 type. I.e., we always pass a half type for
AArch64, this hasn't changed, but now also for ARM but only when FullFP16 is
enabled, and i16 otherwise.

This is intended to be non-functional change, but together with the backend
work in D44538 which adds support for f16 vectors, this enables adding the
AArch32 FP16 (vector) intrinsics.

Differential Revision: https://reviews.llvm.org/D44561

llvm-svn: 327836
  • Loading branch information
Sjoerd Meijer committed Mar 19, 2018
1 parent f646bcc commit 87793e7
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 20 deletions.
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/TargetInfo.h
Expand Up @@ -61,6 +61,8 @@ class TargetInfo : public RefCountedBase<TargetInfo> {
bool TLSSupported;
bool VLASupported;
bool NoAsmVariants; // True if {|} are normal characters.
bool HasLegalHalfType; // True if the backend supports operations on the half
// LLVM IR type.
bool HasFloat128;
unsigned char PointerWidth, PointerAlign;
unsigned char BoolWidth, BoolAlign;
Expand Down Expand Up @@ -361,6 +363,9 @@ class TargetInfo : public RefCountedBase<TargetInfo> {
return (getPointerWidth(0) >= 64) || getTargetOpts().ForceEnableInt128;
} // FIXME

/// \brief Determine whether _Float16 is supported on this target.
virtual bool hasLegalHalfType() const { return HasLegalHalfType; }

/// \brief Determine whether the __float128 type is supported on this target.
virtual bool hasFloat128Type() const { return HasFloat128; }

Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/TargetInfo.cpp
Expand Up @@ -32,6 +32,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
TLSSupported = true;
VLASupported = true;
NoAsmVariants = false;
HasLegalHalfType = false;
HasFloat128 = false;
PointerWidth = PointerAlign = 32;
BoolWidth = BoolAlign = 8;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/AArch64.cpp
Expand Up @@ -49,6 +49,8 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
IntMaxType = SignedLong;
}

// All AArch64 implementations support ARMv8 FP, which makes half a legal type.
HasLegalHalfType = true;

LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
MaxVectorAlign = 128;
Expand Down
7 changes: 3 additions & 4 deletions clang/lib/Basic/Targets/ARM.cpp
Expand Up @@ -379,7 +379,6 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
Unaligned = 1;
SoftFloat = SoftFloatABI = false;
HWDiv = 0;
HasFullFP16 = 0;

// This does not diagnose illegal cases like having both
// "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp".
Expand Down Expand Up @@ -421,7 +420,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
} else if (Feature == "+fp16") {
HW_FP |= HW_FP_HP;
} else if (Feature == "+fullfp16") {
HasFullFP16 = 1;
HasLegalHalfType = true;
}
}
HW_FP &= ~HW_FP_remove;
Expand Down Expand Up @@ -714,11 +713,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__ARM_FP_FAST", "1");

// Armv8.2-A FP16 vector intrinsic
if ((FPU & NeonFPU) && HasFullFP16)
if ((FPU & NeonFPU) && HasLegalHalfType)
Builder.defineMacro("__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", "1");

// Armv8.2-A FP16 scalar intrinsics
if (HasFullFP16)
if (HasLegalHalfType)
Builder.defineMacro("__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", "1");


Expand Down
1 change: 0 additions & 1 deletion clang/lib/Basic/Targets/ARM.h
Expand Up @@ -69,7 +69,6 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
unsigned Crypto : 1;
unsigned DSP : 1;
unsigned Unaligned : 1;
unsigned HasFullFP16 : 1;

enum {
LDREX_B = (1 << 0), /// byte (8-bit)
Expand Down
32 changes: 17 additions & 15 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -3441,7 +3441,7 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,

static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
NeonTypeFlags TypeFlags,
llvm::Triple::ArchType Arch,
bool HasLegalHalfType=true,
bool V1Ty=false) {
int IsQuad = TypeFlags.isQuad();
switch (TypeFlags.getEltType()) {
Expand All @@ -3452,9 +3452,7 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
case NeonTypeFlags::Poly16:
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
case NeonTypeFlags::Float16:
// FIXME: Only AArch64 backend can so far properly handle half types.
// Remove else part once ARM backend support for half is complete.
if (Arch == llvm::Triple::aarch64)
if (HasLegalHalfType)
return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
else
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
Expand Down Expand Up @@ -4338,8 +4336,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
NeonTypeFlags Type(NeonTypeConst.getZExtValue());
bool Usgn = Type.isUnsigned();
bool Quad = Type.isQuad();
const bool HasLegalHalfType = getTarget().hasLegalHalfType();

llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
Expand Down Expand Up @@ -4413,13 +4412,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvt_f32_v:
case NEON::BI__builtin_neon_vcvtq_f32_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_f16_v:
case NEON::BI__builtin_neon_vcvtq_f16_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
HasLegalHalfType);
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_n_f16_v:
Expand Down Expand Up @@ -5528,7 +5529,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
bool usgn = Type.isUnsigned();
bool rightShift = false;

llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
llvm::VectorType *VTy = GetNeonType(this, Type,
getTarget().hasLegalHalfType());
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
Expand Down Expand Up @@ -5774,7 +5776,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID

// Determine the type of this overloaded NEON intrinsic.
NeonTypeFlags Type(Result.getZExtValue());
llvm::VectorType *Ty = GetNeonType(&CGF, Type, Arch);
llvm::VectorType *Ty = GetNeonType(&CGF, Type);
if (!Ty)
return nullptr;

Expand Down Expand Up @@ -6828,7 +6830,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
}

llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
llvm::VectorType *VTy = GetNeonType(this, Type);
llvm::Type *Ty = VTy;
if (!Ty)
return nullptr;
Expand Down Expand Up @@ -6893,7 +6895,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
NeonTypeFlags(NeonTypeFlags::Float64, false, true), Arch);
NeonTypeFlags(NeonTypeFlags::Float64, false, true));
Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
Expand Down Expand Up @@ -7132,22 +7134,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvt_f64_v:
case NEON::BI__builtin_neon_vcvtq_f64_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad), Arch);
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_f64_f32: {
assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
"unexpected vcvt_f64_f32 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));

return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
}
case NEON::BI__builtin_neon_vcvt_f32_f64: {
assert(Type.getEltType() == NeonTypeFlags::Float32 &&
"unexpected vcvt_f32_f64 builtin");
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag, Arch));
Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));

return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
}
Expand Down Expand Up @@ -7244,7 +7246,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Quad = true;
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
llvm::Type *VTy = GetNeonType(this,
NeonTypeFlags(NeonTypeFlags::Float64, false, Quad), Arch);
NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
Expand Down

0 comments on commit 87793e7

Please sign in to comment.