From 76a55103f40b145cdd41a5fb7af9b600095f3d91 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Fri, 26 Sep 2025 08:30:13 +0000 Subject: [PATCH 1/2] Add simd fpcvt codegen for fptoi(_sat) --- .../lib/Target/AArch64/AArch64InstrFormats.td | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 300 ++- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 18 +- .../AArch64/GlobalISel/regbank-fp-use-def.mir | 2 +- .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 2039 +++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 57 +- llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 30 +- 7 files changed, 2305 insertions(+), 143 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index f07d3514d1a99..957d28a1ec308 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -5302,7 +5302,7 @@ multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, } multiclass FPToIntegerSIMDScalar rmode, bits<3> opcode, string asm, - SDPatternOperator OpN = null_frag> { + SDPatternOperator OpN> { // double-precision to 32-bit SIMD/FPR def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm, [(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f788c7510f80c..6e9f0f9a2242d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5239,114 +5239,11 @@ let Predicates = [HasNEON, HasFPRCVT] in{ defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>; defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>; defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>; - defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">; - defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">; + defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>; + defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>; } -// AArch64's FCVT instructions saturate when out of range. -multiclass FPToIntegerSatPats { - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat f16:$Rn, i32)), - (!cast(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat f16:$Rn, i64)), - (!cast(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat f32:$Rn, i32)), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat f32:$Rn, i64)), - (!cast(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat f64:$Rn, i32)), - (!cast(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat f64:$Rn, i64)), - (!cast(INST # UXDr) f64:$Rn)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat_gi f16:$Rn)), - (!cast(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat_gi f16:$Rn)), - (!cast(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat_gi f32:$Rn)), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat_gi f32:$Rn)), - (!cast(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat_gi f64:$Rn)), - (!cast(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat_gi f64:$Rn)), - (!cast(INST # UXDr) f64:$Rn)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), - (!cast(INST # SWHri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), - (!cast(INST # SXHri) $Rn, $scale)>; - } - def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), - (!cast(INST # SWSri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), - (!cast(INST # SXSri) $Rn, $scale)>; - def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), - (!cast(INST # SWDri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), - (!cast(INST # SXDri) $Rn, $scale)>; - - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), - (!cast(INST # SWHri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), - (!cast(INST # SXHri) $Rn, $scale)>; - } - def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), - (!cast(INST # SWSri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), - (!cast(INST # SXSri) $Rn, $scale)>; - def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), - (!cast(INST # SWDri) $Rn, $scale)>; - def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), - (!cast(INST # SXDri) $Rn, $scale)>; -} - -defm : FPToIntegerSatPats; -defm : FPToIntegerSatPats; - -multiclass FPToIntegerPats { - def : Pat<(i32 (to_int (round f32:$Rn))), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int (round f32:$Rn))), - (!cast(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int (round f64:$Rn))), - (!cast(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int (round f64:$Rn))), - (!cast(INST # UXDr) f64:$Rn)>; - - // These instructions saturate like fp_to_[su]int_sat. - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), - (!cast(INST # UWHr) f16:$Rn)>; - def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), - (!cast(INST # UXHr) f16:$Rn)>; - } - def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), - (!cast(INST # UXSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), - (!cast(INST # UWDr) f64:$Rn)>; - def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), - (!cast(INST # UXDr) f64:$Rn)>; -} - -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; - - let Predicates = [HasFullFP16] in { def : Pat<(i32 (any_lround f16:$Rn)), @@ -6553,8 +6450,8 @@ defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtn defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>; defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>; def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; -defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; -defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; +defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; @@ -6596,6 +6493,8 @@ defm: FPToIntegerSIMDScalarPatterns; defm: FPToIntegerSIMDScalarPatterns; defm: FPToIntegerSIMDScalarPatterns; defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; multiclass FPToIntegerIntPats { let Predicates = [HasFullFP16] in { @@ -6652,6 +6551,193 @@ multiclass FPToIntegerIntPats { defm : FPToIntegerIntPats; defm : FPToIntegerIntPats; +// AArch64's FCVT instructions saturate when out of range. +multiclass FPToIntegerSatPats { + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat f16:$Rn, i32)), + (!cast(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat f16:$Rn, i64)), + (!cast(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat f32:$Rn, i32)), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat f32:$Rn, i64)), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat f64:$Rn, i32)), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat f64:$Rn, i64)), + (!cast(INST # UXDr) f64:$Rn)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat_gi f16:$Rn)), + (!cast(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f16:$Rn)), + (!cast(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi f32:$Rn)), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f32:$Rn)), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi f64:$Rn)), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f64:$Rn)), + (!cast(INST # UXDr) f64:$Rn)>; + + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + let Predicates = [HasFPRCVT] in { + def : Pat<(i32 (to_int_sat_gi f16:$Rn)), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f16:$Rn)), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f32:$Rn)), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi f64:$Rn)), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi f32:$Rn)), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f64:$Rn)), + (!cast(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))), + (!cast(INST # SDr) f64:$Rn)>; + + def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f16:$Rn)))), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f16:$Rn)))), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f32:$Rn)))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f64:$Rn)))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))), + (!cast(INST # v1i64) f64:$Rn)>; + + def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f32:$Rn)))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f64:$Rn)))), + (!cast(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), + (!cast(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), + (!cast(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), + (!cast(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), + (!cast(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), + (!cast(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), + (!cast(INST # SXDri) $Rn, $scale)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), + (!cast(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), + (!cast(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), + (!cast(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), + (!cast(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), + (!cast(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), + (!cast(INST # SXDri) $Rn, $scale)>; +} + +defm : FPToIntegerSatPats; +defm : FPToIntegerSatPats; + +multiclass FPToIntegerPats { + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast(INST # UXDr) f64:$Rn)>; + + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast(INST # v1i32) f32:$Rn)>; + let Predicates = [HasFPRCVT] in { + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))), + (!cast(INST # v1i64) f64:$Rn)>; + + // These instructions saturate like fp_to_[su]int_sat. + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), + (!cast(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), + (!cast(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), + (!cast(INST # UXDr) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))), + (!cast(INST # v1i64) f64:$Rn)>; +} + +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; + // f16 -> s16 conversions let Predicates = [HasFullFP16] in { def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index f90bcc7a77cdf..5a25b85599398 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -852,7 +852,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case TargetOpcode::G_FPTOSI_SAT: - case TargetOpcode::G_FPTOUI_SAT: { + case TargetOpcode::G_FPTOUI_SAT: + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: { LLT DstType = MRI.getType(MI.getOperand(0).getReg()); if (DstType.isVector()) break; @@ -860,11 +862,19 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; break; } - OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; + TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI); + if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) && + all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), + [&](const MachineInstr &UseMI) { + return onlyUsesFP(UseMI, MRI, TRI) || + prefersFPUse(UseMI, MRI, TRI); + })) + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + else + OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; break; } - case TargetOpcode::G_FPTOSI: - case TargetOpcode::G_FPTOUI: case TargetOpcode::G_INTRINSIC_LRINT: case TargetOpcode::G_INTRINSIC_LLRINT: if (MRI.getType(MI.getOperand(0).getReg()).isVector()) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir index b2528840a39cf..46dbc1556fb1d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir @@ -96,7 +96,7 @@ body: | ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]] - ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32) + ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32) %0:_(s32) = COPY $w0 %2:_(s32) = COPY $w1 %3:_(s32) = COPY $w2 diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll new file mode 100644 index 0000000000000..4a6b1f1f1d9d2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -0,0 +1,2039 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd + +; +; FPTOI +; + +define float @test_fptosi_f16_i32_simd(half %a) { +; CHECK-LABEL: test_fptosi_f16_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %r = fptosi half %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptosi_f16_i64_simd(half %a) { +; CHECK-LABEL: test_fptosi_f16_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %r = fptosi half %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define float @test_fptosi_f64_i32_simd(double %a) { +; CHECK-LABEL: test_fptosi_f64_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %r = fptosi double %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptosi_f32_i64_simd(float %a) { +; CHECK-LABEL: test_fptosi_f32_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %r = fptosi float %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define double @test_fptosi_f64_i64_simd(double %a) { +; CHECK-LABEL: test_fptosi_f64_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %r = fptosi double %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + + +define float @test_fptosi_f32_i32_simd(float %a) { +; CHECK-LABEL: test_fptosi_f32_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %r = fptosi float %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define float @test_fptoui_f16_i32_simd(half %a) { +; CHECK-LABEL: test_fptoui_f16_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %r = fptoui half %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptoui_f16_i64_simd(half %a) { +; CHECK-LABEL: test_fptoui_f16_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %r = fptoui half %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define float @test_fptoui_f64_i32_simd(double %a) { +; CHECK-LABEL: test_fptoui_f64_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %r = fptoui double %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptoui_f32_i64_simd(float %a) { +; CHECK-LABEL: test_fptoui_f32_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %r = fptoui float %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define double @test_fptoui_f64_i64_simd(double %a) { +; CHECK-LABEL: test_fptoui_f64_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %r = fptoui double %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + + +define float @test_fptoui_f32_i32_simd(float %a) { +; CHECK-LABEL: test_fptoui_f32_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %r = fptoui float %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + + +; +; FPTOI experimental +; + +define float @fptosi_i32_f16_simd(half %x) { +; CHECK-LABEL: fptosi_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @fptosi_i64_f16_simd(half %x) { +; CHECK-LABEL: fptosi_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @fptosi_i64_f32_simd(float %x) { +; CHECK-LABEL: fptosi_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptosi_i32_f64_simd(double %x) { +; CHECK-LABEL: fptosi_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @fptosi_i64_f64_simd(double %x) { +; CHECK-LABEL: fptosi_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptosi_i32_f32_simd(float %x) { +; CHECK-LABEL: fptosi_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + + + +define float @fptoui_i32_f16_simd(half %x) { +; CHECK-LABEL: fptoui_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @fptoui_i64_f16_simd(half %x) { +; CHECK-LABEL: fptoui_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @fptoui_i64_f32_simd(float %x) { +; CHECK-LABEL: fptoui_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptoui_i32_f64_simd(double %x) { +; CHECK-LABEL: fptoui_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @fptoui_i64_f64_simd(double %x) { +; CHECK-LABEL: fptoui_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptoui_i32_f32_simd(float %x) { +; CHECK-LABEL: fptoui_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +; +; FPTOI rounding +; + + +define double @fcvtas_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtas_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtas_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtau_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtau_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtms_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtms_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + + +define double @fcvtmu_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtmu_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtps_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtps_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtpu_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtpu_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtzs_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +; +; FPTOI saturating +; + +define float @fcvtzs_sh_sat_simd(half %a) { +; CHECK-LABEL: fcvtzs_sh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f16(half %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dh_sat_simd(half %a) { +; CHECK-LABEL: fcvtzs_dh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f16(half %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzs_ds_sat_simd(float %a) { +; CHECK-LABEL: fcvtzs_ds_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f32(float %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_sat_simd(double %a) { +; CHECK-LABEL: fcvtzs_sd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f64(double %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_sat_simd(float %a) { +; CHECK-LABEL: fcvtzs_ss_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_sat_simd(double %a) { +; CHECK-LABEL: fcvtzs_dd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sh_sat_simd(half %a) { +; CHECK-LABEL: fcvtzu_sh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptoui.sat.i32.f16(half %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dh_sat_simd(half %a) { +; CHECK-LABEL: fcvtzu_dh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptoui.sat.i64.f16(half %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_sat_simd(float %a) { +; CHECK-LABEL: fcvtzu_ds_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptoui.sat.i64.f32(float %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_sat_simd(double %a) { +; CHECK-LABEL: fcvtzu_sd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptoui.sat.i32.f64(double %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_sat_simd(float %a) { +; CHECK-LABEL: fcvtzu_ss_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_sat_simd(double %a) { +; CHECK-LABEL: fcvtzu_dd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +; +; FPTOI saturating with rounding +; + +define float @fcvtas_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtas_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtas_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtas_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtas_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtas_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtau_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtau_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtau_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtau_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtms_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtms_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtms_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtms_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtmu_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtmu_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtmu_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtmu_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtps_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtps_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtps_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtps_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtpu_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtpu_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtpu_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtpu_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzs_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzs_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzs_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzu_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzu_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, h0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzu s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzu d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +declare half @llvm.floor.f16(half) nounwind readnone +declare half @llvm.ceil.f16(half) nounwind readnone +declare half @llvm.trunc.f16(half) nounwind readnone +declare half @llvm.round.f16(half) nounwind readnone +declare float @floorf(float) nounwind readnone +declare float @ceilf(float) nounwind readnone +declare float @truncf(float) nounwind readnone +declare float @roundf(float) nounwind readnone +declare double @floor(double) nounwind readnone +declare double @ceil(double) nounwind readnone +declare double @trunc(double) nounwind readnone +declare double @round(double) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index e18a5f695ba29..d8f370884c84a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -980,12 +980,18 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) { } define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { -; CHECK-LABEL: test_bitcastv8i8tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.8b, v0.8b -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv8i8tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.8b, v0.8b +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv8i8tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.8b, v0.8b +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <8 x i8> zeroinitializer, %a %1 = bitcast <8 x i8> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -993,12 +999,18 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { } define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { -; CHECK-LABEL: test_bitcastv4i16tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.4h, v0.4h -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv4i16tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.4h, v0.4h +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv4i16tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.4h, v0.4h +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <4 x i16> zeroinitializer, %a %1 = bitcast <4 x i16> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -1006,12 +1018,18 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { } define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { -; CHECK-LABEL: test_bitcastv2i32tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.2s, v0.2s -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv2i32tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.2s, v0.2s +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv2i32tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.2s, v0.2s +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <2 x i32> zeroinitializer, %a %1 = bitcast <2 x i32> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -1031,8 +1049,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: neg x8, x8 ; CHECK-GI-NEXT: fmov d0, x8 -; CHECK-GI-NEXT: fcvtzs x8, d0 -; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: fcvtzs d0, d0 ; CHECK-GI-NEXT: ret %sub.i = sub <1 x i64> zeroinitializer, %a %1 = bitcast <1 x i64> %sub.i to <1 x double> diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll index 627d31f9a64fc..1e0cfa0201263 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll @@ -359,11 +359,16 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { ; FIXME: Generate "fcvtzs d0, d0"? define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind { -; CHECK-LABEL: fcvtzs_1d: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_1d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_1d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %tmp3 = fptosi <1 x double> %A to <1 x i64> ret <1 x i64> %tmp3 } @@ -438,11 +443,16 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { ; FIXME: Generate "fcvtzu d0, d0"? define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind { -; CHECK-LABEL: fcvtzu_1d: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_1d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_1d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtzu d0, d0 +; CHECK-GI-NEXT: ret %tmp3 = fptoui <1 x double> %A to <1 x i64> ret <1 x i64> %tmp3 } From 1a58bd508cb46e517c30ea0aeae99e22f0a496d2 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Fri, 3 Oct 2025 13:35:01 +0000 Subject: [PATCH 2/2] Test fix --- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 21 ++++++++++++------- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 21 ++++++++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index c74112937ba53..b963acd8cb2a1 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -31,8 +31,7 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_signed_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzs w8, s0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fcvtzs s0, s0 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -1162,18 +1161,24 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v1f16_v1i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs w8, s0 -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: ret +; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32: +; CHECK-SD-CVT: // %bb.0: +; CHECK-SD-CVT-NEXT: fcvt s0, h0 +; CHECK-SD-CVT-NEXT: fcvtzs w8, s0 +; CHECK-SD-CVT-NEXT: fmov s0, w8 +; CHECK-SD-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzs s0, s0 +; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index efe0a1bedbc9e..5a66b68af8e96 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -31,8 +31,7 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzu w8, s0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fcvtzu s0, s0 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -993,18 +992,24 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu w8, s0 -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: ret +; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32: +; CHECK-SD-CVT: // %bb.0: +; CHECK-SD-CVT-NEXT: fcvt s0, h0 +; CHECK-SD-CVT-NEXT: fcvtzu w8, s0 +; CHECK-SD-CVT-NEXT: fmov s0, w8 +; CHECK-SD-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzu s0, s0 +; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x }