Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,8 @@ X86 Support

Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
- More intrinsics for the following AArch64 instructions:
FCVTZ[US], FCVTN[US], FCVTM[US], FCVTP[US], FCVTA[US]

Android Support
^^^^^^^^^^^^^^^
Expand Down
65 changes: 45 additions & 20 deletions clang/include/clang/Basic/arm_neon.td
Original file line number Diff line number Diff line change
Expand Up @@ -1466,26 +1466,51 @@ def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Converts
def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">;
def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">;
def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">;
def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">;
def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">;
def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">;
def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">;
def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">;
def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">;
def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">;
def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">;
def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">;
def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">;
def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">;
def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">;
def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">;
def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">;
def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">;
def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">;
def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">;
def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">;

def SCALAR_FCVTN_F32toSS : SInst<"vcvtn_s32", "(1S)1", "Sf">;
def SCALAR_FCVTN_F32toUS : SInst<"vcvtn_u32", "(1U)1", "Sf">;
def SCALAR_FCVTN_F64toSS : SInst<"vcvtn_s32", "(1S<)1", "Sd">;
def SCALAR_FCVTN_F64toUS : SInst<"vcvtn_u32", "(1U<)1", "Sd">;
def SCALAR_FCVTN_F32toSD : SInst<"vcvtn_s64", "(1S>)1", "Sf">;
def SCALAR_FCVTN_F32toUD : SInst<"vcvtn_u64", "(1U>)1", "Sf">;
def SCALAR_FCVTN_F64toSD : SInst<"vcvtn_s64", "(1S)1", "Sd">;
def SCALAR_FCVTN_F64toUD : SInst<"vcvtn_u64", "(1U)1", "Sd">;

def SCALAR_FCVTM_F32toSS : SInst<"vcvtm_s32", "(1S)1", "Sf">;
def SCALAR_FCVTM_F32toUS : SInst<"vcvtm_u32", "(1U)1", "Sf">;
def SCALAR_FCVTM_F64toSS : SInst<"vcvtm_s32", "(1S<)1", "Sd">;
def SCALAR_FCVTM_F64toUS : SInst<"vcvtm_u32", "(1U<)1", "Sd">;
def SCALAR_FCVTM_F32toSD : SInst<"vcvtm_s64", "(1S>)1", "Sf">;
def SCALAR_FCVTM_F32toUD : SInst<"vcvtm_u64", "(1U>)1", "Sf">;
def SCALAR_FCVTM_F64toSD : SInst<"vcvtm_s64", "(1S)1", "Sd">;
def SCALAR_FCVTM_F64toUD : SInst<"vcvtm_u64", "(1U)1", "Sd">;

def SCALAR_FCVTA_F32toSS : SInst<"vcvta_s32", "(1S)1", "Sf">;
def SCALAR_FCVTA_F32toUS : SInst<"vcvta_u32", "(1U)1", "Sf">;
def SCALAR_FCVTA_F64toSS : SInst<"vcvta_s32", "(1S<)1", "Sd">;
def SCALAR_FCVTA_F64toUS : SInst<"vcvta_u32", "(1U<)1", "Sd">;
def SCALAR_FCVTA_F32toSD : SInst<"vcvta_s64", "(1S>)1", "Sf">;
def SCALAR_FCVTA_F32toUD : SInst<"vcvta_u64", "(1U>)1", "Sf">;
def SCALAR_FCVTA_F64toSD : SInst<"vcvta_s64", "(1S)1", "Sd">;
def SCALAR_FCVTA_F64toUD : SInst<"vcvta_u64", "(1U)1", "Sd">;

def SCALAR_FCVTP_F32toSS : SInst<"vcvtp_s32", "(1S)1", "Sf">;
def SCALAR_FCVTP_F32toUS : SInst<"vcvtp_u32", "(1U)1", "Sf">;
def SCALAR_FCVTP_F64toSS : SInst<"vcvtp_s32", "(1S<)1", "Sd">;
def SCALAR_FCVTP_F64toUS : SInst<"vcvtp_u32", "(1U<)1", "Sd">;
def SCALAR_FCVTP_F32toSD : SInst<"vcvtp_s64", "(1S>)1", "Sf">;
def SCALAR_FCVTP_F32toUD : SInst<"vcvtp_u64", "(1U>)1", "Sf">;
def SCALAR_FCVTP_F64toSD : SInst<"vcvtp_s64", "(1S)1", "Sd">;
def SCALAR_FCVTP_F64toUD : SInst<"vcvtp_u64", "(1U)1", "Sd">;

def SCALAR_FCVTZ_F32toSS : SInst<"vcvt_s32", "(1S)1", "Sf">;
def SCALAR_FCVTZ_F32toUS : SInst<"vcvt_u32", "(1U)1", "Sf">;
def SCALAR_FCVTZ_F64toSS : SInst<"vcvt_s32", "(1S<)1", "Sd">;
def SCALAR_FCVTZ_F64toUS : SInst<"vcvt_u32", "(1U<)1", "Sd">;
def SCALAR_FCVTZ_F32toSD : SInst<"vcvt_s64", "(1S>)1", "Sf">;
def SCALAR_FCVTZ_F32toUD : SInst<"vcvt_u64", "(1U>)1", "Sf">;
def SCALAR_FCVTZ_F64toSD : SInst<"vcvt_s64", "(1S)1", "Sd">;
def SCALAR_FCVTZ_F64toUD : SInst<"vcvt_u64", "(1U)1", "Sd">;

////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Reciprocal Estimate
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/CodeGen/TargetBuiltins/ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ struct ARMVectorIntrinsicInfo {
Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
TypeModifier }

// clang-format off
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
NEONMAP0(splat_lane_v),
Expand Down Expand Up @@ -1209,35 +1210,55 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP0(vcvth_bf16_f32),
NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
Expand Down Expand Up @@ -1422,6 +1443,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
};
// clang-format on

// Some intrinsics are equivalent for codegen.
static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
Expand Down
Loading