diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d1d352dcc5f1f..53228cb79e75f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5848,6 +5848,57 @@ multiclass SIMDTwoVectorFPToIntSatPats; defm : SIMDTwoVectorFPToIntSatPats; +// Fused round + convert to int patterns for vectors +multiclass SIMDTwoVectorFPToIntRoundPats { + let Predicates = [HasFullFP16] in { + def : Pat<(v4i16 (to_int (round v4f16:$Rn))), + (!cast(INST # v4f16) v4f16:$Rn)>; + def : Pat<(v8i16 (to_int (round v8f16:$Rn))), + (!cast(INST # v8f16) v8f16:$Rn)>; + + def : Pat<(v4i16 (to_int_sat (round v4f16:$Rn), i16)), + (!cast(INST # v4f16) v4f16:$Rn)>; + def : Pat<(v8i16 (to_int_sat (round v8f16:$Rn), i16)), + (!cast(INST # v8f16) v8f16:$Rn)>; + + def : Pat<(v4i16 (to_int_sat_gi (round v4f16:$Rn))), + (!cast(INST # v4f16) v4f16:$Rn)>; + def : Pat<(v8i16 (to_int_sat_gi (round v8f16:$Rn))), + (!cast(INST # v8f16) v8f16:$Rn)>; + } + def : Pat<(v2i32 (to_int (round v2f32:$Rn))), + (!cast(INST # v2f32) v2f32:$Rn)>; + def : Pat<(v4i32 (to_int (round v4f32:$Rn))), + (!cast(INST # v4f32) v4f32:$Rn)>; + def : Pat<(v2i64 (to_int (round v2f64:$Rn))), + (!cast(INST # v2f64) v2f64:$Rn)>; + + def : Pat<(v2i32 (to_int_sat (round v2f32:$Rn), i32)), + (!cast(INST # v2f32) v2f32:$Rn)>; + def : Pat<(v4i32 (to_int_sat (round v4f32:$Rn), i32)), + (!cast(INST # v4f32) v4f32:$Rn)>; + def : Pat<(v2i64 (to_int_sat (round v2f64:$Rn), i64)), + (!cast(INST # v2f64) v2f64:$Rn)>; + + def : Pat<(v2i32 (to_int_sat_gi (round v2f32:$Rn))), + (!cast(INST # v2f32) v2f32:$Rn)>; + def : Pat<(v4i32 (to_int_sat_gi (round v4f32:$Rn))), + (!cast(INST # v4f32) v4f32:$Rn)>; + def : Pat<(v2i64 (to_int_sat_gi (round v2f64:$Rn))), + (!cast(INST # v2f64) v2f64:$Rn)>; +} + +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; +defm : SIMDTwoVectorFPToIntRoundPats; + def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; @@ -6801,6 +6852,21 @@ multiclass FPToIntegerPats(INST # UXDr) f64:$Rn)>; + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))), + (!cast(INST # UWHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))), + (!cast(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int_sat_gi (round f64:$Rn))), + (!cast(INST # UXDr) f64:$Rn)>; + // For global-isel we can use register classes to determine // which FCVT instruction to use. let Predicates = [HasFPRCVT] in { @@ -6834,14 +6900,16 @@ multiclass FPToIntegerPats(INST # v1i64) f64:$Rn)>; } -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; -defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; let Predicates = [HasFPRCVT] in { def : Pat<(f32 (bitconvert (i32 (any_lround f16:$Rn)))), diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll index a729772f2897a..3f6ad552ec200 100644 --- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -543,6 +543,138 @@ define double @fcvtau_dd_round_simd(double %a) { ret double %bc } +define double @fcvtns_ds_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ds_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ds_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtns_sd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_sd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_sd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtns_ss_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ss_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ss_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtns_dd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_dd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_dd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtnu_ds_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ds_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtnu_sd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_sd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtnu_ss_roundeven_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ss_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = fptoui float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtnu_dd_roundeven_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_roundeven_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_dd_roundeven_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = fptoui double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} define double @fcvtms_ds_round_simd(float %a) { ; CHECK-NOFPRCVT-LABEL: fcvtms_ds_round_simd: @@ -1153,7 +1285,7 @@ define float @fcvtas_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtas s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1170,7 +1302,7 @@ define double @fcvtas_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtas d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1253,7 +1385,7 @@ define float @fcvtau_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtau s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1270,7 +1402,7 @@ define double @fcvtau_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtau d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1342,6 +1474,206 @@ define double @fcvtau_dd_simd(double %a) { ret double %bc } +define float @fcvtns_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtns_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtns_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtns_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtns_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtns_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtns_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtns d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtns_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtnu_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtnu_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.roundeven.f16(half %a) + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtnu_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtnu_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtnu_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.roundeven.f32(float %a) + %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtnu_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtnu_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtnu d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtnu_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.roundeven.f64(double %a) + %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + define float @fcvtms_sh_simd(half %a) { ; CHECK-NOFPRCVT-LABEL: fcvtms_sh_simd: ; CHECK-NOFPRCVT: // %bb.0: @@ -1353,7 +1685,7 @@ define float @fcvtms_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtms s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1370,7 +1702,7 @@ define double @fcvtms_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtms d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1453,7 +1785,7 @@ define float @fcvtmu_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtmu s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1470,7 +1802,7 @@ define double @fcvtmu_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtmu d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1553,7 +1885,7 @@ define float @fcvtps_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtps s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1570,7 +1902,7 @@ define double @fcvtps_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtps d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1653,7 +1985,7 @@ define float @fcvtpu_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtpu s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1670,7 +2002,7 @@ define double @fcvtpu_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtpu d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1753,7 +2085,7 @@ define float @fcvtzs_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1770,7 +2102,7 @@ define double @fcvtzs_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc @@ -1853,7 +2185,7 @@ define float @fcvtzu_sh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu s0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float ret float %bc @@ -1870,7 +2202,7 @@ define double @fcvtzu_dh_simd(half %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu d0, h0 ; CHECK-NEXT: ret - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double ret double %bc diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll new file mode 100644 index 0000000000000..780bdebd832b6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt-fused-round.ll @@ -0,0 +1,1611 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 +; RUN: llc < %s -mtriple=arm64-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 + +; +; Tests for fused round + convert to int patterns (FCVTAS, FCVTAU, FCVTMS, FCVTMU, etc.) +; + +; +; round + signed -> fcvtas +; + +define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtas_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtas_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtas_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtas_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtas_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtas_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; round + unsigned -> fcvtau +; + +define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtau_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.round.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtau_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtau_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.round.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtau_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtau_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtau_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.round.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; roundeven + signed -> fcvtns +; + +define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtns_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtns_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtns_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtns_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtns_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtns_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; roundeven + unsigned -> fcvtnu +; + +define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtnu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtnu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtnu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtnu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtnu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtnu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; floor + signed -> fcvtms +; + +define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtms_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtms_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtms_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtms_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtms_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtms_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; floor + unsigned -> fcvtmu +; + +define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtmu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtmu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtmu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtmu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtmu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtmu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; ceil + signed -> fcvtps +; + +define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtps_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtps_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtps_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtps_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtps_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtps_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; ceil + unsigned -> fcvtpu +; + +define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtpu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtpu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtpu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtpu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtpu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtpu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; trunc + signed -> fcvtzs (already the default, but test the fusion) +; + +define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtzs_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtzs_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzs_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzs_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = fptosi <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtzs_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzs_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; trunc + unsigned -> fcvtzu +; + +define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @fcvtzu_2s_sat(<2 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_2s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: ret + %tmp1 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A) + %tmp2 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %tmp1) + ret <2 x i32> %tmp2 +} + + +define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @fcvtzu_4s_sat(<4 x float> %A) nounwind { +; CHECK-LABEL: fcvtzu_4s_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: ret + %tmp1 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A) + %tmp2 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %tmp1) + ret <4 x i32> %tmp2 +} + + +define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzu_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = fptoui <2 x double> %tmp1 to <2 x i64> + ret <2 x i64> %tmp2 +} + +define <2 x i64> @fcvtzu_2d_sat(<2 x double> %A) nounwind { +; CHECK-LABEL: fcvtzu_2d_sat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-NEXT: ret + %tmp1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A) + %tmp2 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %tmp1) + ret <2 x i64> %tmp2 +} + + +; +; f16 tests (require +fullfp16) +; + +define <4 x i16> @fcvtas_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtas v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtas_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtas v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtas_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtas v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtas_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtas_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtas_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtas v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtau_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtau v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtau_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtau v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.round.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtau_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtau v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtau_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtau_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frinta v1.4s, v1.4s +; CHECK-NO16-NEXT: frinta v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtau_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtau v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.round.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtns_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtns v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtns_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtns v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtns_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtns v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtns_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtns_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtns_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtns v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtnu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtnu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtnu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtnu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtnu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtnu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtnu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtnu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintn v1.4s, v1.4s +; CHECK-NO16-NEXT: frintn v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtnu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtnu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtms_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtms v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtms_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtms v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtms_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtms v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtms_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtms_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtms_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtms v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtmu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtmu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtmu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtmu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtmu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtmu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtmu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtmu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintm v1.4s, v1.4s +; CHECK-NO16-NEXT: frintm v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtmu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtmu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtps_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtps v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtps_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtps v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtps_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtps v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtps_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtps_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtps_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtps v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtpu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtpu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtpu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtpu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.ceil.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtpu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtpu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtpu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtpu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintp v1.4s, v1.4s +; CHECK-NO16-NEXT: frintp v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtpu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtpu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.ceil.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtzs_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = fptosi <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtzs_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtzs_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = fptosi <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtzs_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzs_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} + + +define <4 x i16> @fcvtzu_4h(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_4h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_4h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = fptoui <4 x half> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <4 x i16> @fcvtzu_4h_sat(<4 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_4h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_4h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: ret + %tmp1 = call <4 x half> @llvm.trunc.v4f16(<4 x half> %A) + %tmp2 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %tmp1) + ret <4 x i16> %tmp2 +} + + +define <8 x i16> @fcvtzu_8h(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_8h: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h +; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_8h: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = fptoui <8 x half> %tmp1 to <8 x i16> + ret <8 x i16> %tmp2 +} + +define <8 x i16> @fcvtzu_8h_sat(<8 x half> %A) nounwind { +; CHECK-NO16-LABEL: fcvtzu_8h_sat: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NO16-NEXT: frintz v1.4s, v1.4s +; CHECK-NO16-NEXT: frintz v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v1.4h +; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NO16-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_8h_sat: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: ret + %tmp1 = call <8 x half> @llvm.trunc.v8f16(<8 x half> %A) + %tmp2 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %tmp1) + ret <8 x i16> %tmp2 +} diff --git a/llvm/test/CodeGen/AArch64/round-conv.ll b/llvm/test/CodeGen/AArch64/round-conv.ll index d78aa207925a4..66e5297aa2d68 100644 --- a/llvm/test/CodeGen/AArch64/round-conv.ll +++ b/llvm/test/CodeGen/AArch64/round-conv.ll @@ -5,7 +5,7 @@ ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testmsws(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -15,7 +15,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testmsxs(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -25,7 +25,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testmswd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -35,7 +35,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testmsxd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -45,7 +45,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testmuws(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -55,7 +55,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testmuxs(float %a) { entry: - %call = call float @llvm.floor.f32(float %a) nounwind readnone + %call = call float @llvm.floor.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -65,7 +65,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testmuwd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -75,7 +75,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testmuxd(double %a) { entry: - %call = call double @llvm.floor.f64(double %a) nounwind readnone + %call = call double @llvm.floor.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } @@ -85,7 +85,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testpsws(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -95,7 +95,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testpsxs(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -105,7 +105,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testpswd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -115,7 +115,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testpsxd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -125,7 +125,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testpuws(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -135,7 +135,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testpuxs(float %a) { entry: - %call = call float @llvm.ceil.f32(float %a) nounwind readnone + %call = call float @llvm.ceil.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -145,7 +145,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testpuwd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -155,7 +155,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testpuxd(double %a) { entry: - %call = call double @llvm.ceil.f64(double %a) nounwind readnone + %call = call double @llvm.ceil.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } @@ -165,7 +165,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testzsws(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -175,7 +175,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testzsxs(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -185,7 +185,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testzswd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -195,7 +195,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testzsxd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -205,7 +205,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testzuws(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -215,7 +215,7 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testzuxs(float %a) { entry: - %call = call float @llvm.trunc.f32(float %a) nounwind readnone + %call = call float @llvm.trunc.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -225,7 +225,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testzuwd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -235,7 +235,7 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testzuxd(double %a) { entry: - %call = call double @llvm.trunc.f64(double %a) nounwind readnone + %call = call double @llvm.trunc.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } @@ -245,7 +245,17 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testasws(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) + %conv = fptosi float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testnsws: +; CHECK: fcvtns w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testnsws(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) %conv = fptosi float %call to i32 ret i32 %conv } @@ -255,7 +265,17 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testasxs(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) + %conv = fptosi float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testnsxs: +; CHECK: fcvtns x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testnsxs(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) %conv = fptosi float %call to i64 ret i64 %conv } @@ -265,7 +285,17 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testaswd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) + %conv = fptosi double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testnswd: +; CHECK: fcvtns w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testnswd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) %conv = fptosi double %call to i32 ret i32 %conv } @@ -275,7 +305,17 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testasxd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) + %conv = fptosi double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testnsxd: +; CHECK: fcvtns x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testnsxd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) %conv = fptosi double %call to i64 ret i64 %conv } @@ -285,7 +325,17 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i32 @testauws(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) + %conv = fptoui float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testnuws: +; CHECK: fcvtnu w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testnuws(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) %conv = fptoui float %call to i32 ret i32 %conv } @@ -295,7 +345,17 @@ entry: ; CHECK-NOT: frintx {{s[0-9]+}}, s0 define i64 @testauxs(float %a) { entry: - %call = call float @llvm.round.f32(float %a) nounwind readnone + %call = call float @llvm.round.f32(float %a) + %conv = fptoui float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testnuxs: +; CHECK: fcvtnu x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testnuxs(float %a) { +entry: + %call = call float @llvm.roundeven.f32(float %a) %conv = fptoui float %call to i64 ret i64 %conv } @@ -305,7 +365,17 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i32 @testauwd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) + %conv = fptoui double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testnuwd: +; CHECK: fcvtnu w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testnuwd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) %conv = fptoui double %call to i32 ret i32 %conv } @@ -315,7 +385,17 @@ entry: ; CHECK-NOT: frintx {{d[0-9]+}}, d0 define i64 @testauxd(double %a) { entry: - %call = call double @llvm.round.f64(double %a) nounwind readnone + %call = call double @llvm.round.f64(double %a) + %conv = fptoui double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testnuxd: +; CHECK: fcvtnu x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testnuxd(double %a) { +entry: + %call = call double @llvm.roundeven.f64(double %a) %conv = fptoui double %call to i64 ret i64 %conv } diff --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll index e252cf524f04d..0286febd01f4b 100644 --- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple aarch64 -global-isel -global-isel-abort=2 -mattr=+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Round towards minus infinity (fcvtms). @@ -20,8 +21,24 @@ define i32 @testmswbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmswbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call bfloat @llvm.floor.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.floor.bf16(bfloat %a) %i = call i32 @llvm.fptosi.sat.i32.bf16(bfloat %r) ret i32 %i } @@ -42,8 +59,24 @@ define i64 @testmsxbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsxbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintm s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call bfloat @llvm.floor.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.floor.bf16(bfloat %a) %i = call i64 @llvm.fptosi.sat.i64.bf16(bfloat %r) ret i64 %i } @@ -62,8 +95,13 @@ define i32 @testmswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtms w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtms w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -82,8 +120,13 @@ define i64 @testmsxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtms x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtms x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -93,8 +136,13 @@ define i32 @testmsws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtms w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.floor.f32(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -104,8 +152,13 @@ define i64 @testmsxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtms x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.floor.f32(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -115,8 +168,13 @@ define i32 @testmswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtms w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.floor.f64(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -126,8 +184,13 @@ define i64 @testmsxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtms x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtms x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.floor.f64(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -150,8 +213,24 @@ define i32 @testpswbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpswbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call bfloat @llvm.ceil.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.ceil.bf16(bfloat %a) %i = call i32 @llvm.fptosi.sat.i32.bf16(bfloat %r) ret i32 %i } @@ -172,8 +251,24 @@ define i64 @testpsxbf(bfloat %a) { ; CHECK-NEXT: shll v0.4s, v0.4h, #16 ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsxbf: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: frintp s0, s0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: ubfx w10, w9, #16, #1 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: add w8, w10, w8 +; CHECK-GI-NEXT: lsr w8, w8, #16 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: shll v0.4s, v0.4h, #16 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call bfloat @llvm.ceil.bf16(bfloat %a) nounwind readnone + %r = call bfloat @llvm.ceil.bf16(bfloat %a) %i = call i64 @llvm.fptosi.sat.i64.bf16(bfloat %r) ret i64 %i } @@ -192,8 +287,13 @@ define i32 @testpswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtps w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtps w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -212,8 +312,13 @@ define i64 @testpsxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtps x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtps x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -223,8 +328,13 @@ define i32 @testpsws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtps w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.ceil.f32(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -234,8 +344,13 @@ define i64 @testpsxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtps x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.ceil.f32(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -245,8 +360,13 @@ define i32 @testpswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtps w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.ceil.f64(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -256,8 +376,13 @@ define i64 @testpsxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtps x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtps x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.ceil.f64(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -278,8 +403,13 @@ define i32 @testzswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzs w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzs w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -298,8 +428,13 @@ define i64 @testzsxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzs x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzs x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -309,8 +444,13 @@ define i32 @testzsws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.trunc.f32(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -320,8 +460,13 @@ define i64 @testzsxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.trunc.f32(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -331,8 +476,13 @@ define i32 @testzswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.trunc.f64(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -342,8 +492,13 @@ define i64 @testzsxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.trunc.f64(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } @@ -364,8 +519,13 @@ define i32 @testaswh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtas w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testaswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtas w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) ret i32 %i } @@ -384,8 +544,13 @@ define i64 @testasxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtas x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testasxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtas x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) ret i64 %i } @@ -395,8 +560,13 @@ define i32 @testasws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testasws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtas w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.round.f32(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) ret i32 %i } @@ -406,8 +576,13 @@ define i64 @testasxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testasxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtas x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.round.f32(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) ret i64 %i } @@ -417,8 +592,13 @@ define i32 @testaswd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testaswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtas w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.round.f64(double %a) nounwind readnone + %r = call double @llvm.round.f64(double %a) %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) ret i32 %i } @@ -428,34 +608,129 @@ define i64 @testasxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtas x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testasxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtas x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.round.f64(double %a) nounwind readnone + %r = call double @llvm.round.f64(double %a) %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) ret i64 %i } -declare i32 @llvm.fptosi.sat.i32.bf16 (bfloat) -declare i64 @llvm.fptosi.sat.i64.bf16 (bfloat) -declare i32 @llvm.fptosi.sat.i32.f16 (half) -declare i64 @llvm.fptosi.sat.i64.f16 (half) -declare i32 @llvm.fptosi.sat.i32.f32 (float) -declare i64 @llvm.fptosi.sat.i64.f32 (float) -declare i32 @llvm.fptosi.sat.i32.f64 (double) -declare i64 @llvm.fptosi.sat.i64.f64 (double) - -declare bfloat @llvm.floor.bf16(bfloat) nounwind readnone -declare bfloat @llvm.ceil.bf16(bfloat) nounwind readnone -declare bfloat @llvm.trunc.bf16(bfloat) nounwind readnone -declare bfloat @llvm.round.bf16(bfloat) nounwind readnone -declare half @llvm.floor.f16(half) nounwind readnone -declare half @llvm.ceil.f16(half) nounwind readnone -declare half @llvm.trunc.f16(half) nounwind readnone -declare half @llvm.round.f16(half) nounwind readnone -declare float @llvm.floor.f32(float) nounwind readnone -declare float @llvm.ceil.f32(float) nounwind readnone -declare float @llvm.trunc.f32(float) nounwind readnone -declare float @llvm.round.f32(float) nounwind readnone -declare double @llvm.floor.f64(double) nounwind readnone -declare double @llvm.ceil.f64(double) nounwind readnone -declare double @llvm.trunc.f64(double) nounwind readnone -declare double @llvm.round.f64(double) nounwind readnone +; Round to nearest, ties to even (fcvtns). + +define i32 @testnswh(half %a) { +; CHECK-CVT-LABEL: testnswh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnswh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtns w0, h0 +; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnswh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtns w0, h0 +; CHECK-GI-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + ret i32 %i +} + +define i64 @testnsxh(half %a) { +; CHECK-CVT-LABEL: testnsxh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnsxh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtns x0, h0 +; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnsxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtns x0, h0 +; CHECK-GI-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + ret i64 %i +} + +define i32 @testnsws(float %a) { +; CHECK-LABEL: testnsws: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns w0, s0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnsws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtns w0, s0 +; CHECK-GI-NEXT: ret +entry: + %r = call float @llvm.roundeven.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + ret i32 %i +} + +define i64 @testnsxs(float %a) { +; CHECK-LABEL: testnsxs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns x0, s0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnsxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtns x0, s0 +; CHECK-GI-NEXT: ret +entry: + %r = call float @llvm.roundeven.f32(float %a) + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + ret i64 %i +} + +define i32 @testnswd(double %a) { +; CHECK-LABEL: testnswd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns w0, d0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnswd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtns w0, d0 +; CHECK-GI-NEXT: ret +entry: + %r = call double @llvm.roundeven.f64(double %a) + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + ret i32 %i +} + +define i64 @testnsxd(double %a) { +; CHECK-LABEL: testnsxd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtns x0, d0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnsxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtns x0, d0 +; CHECK-GI-NEXT: ret +entry: + %r = call double @llvm.roundeven.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + ret i64 %i +} diff --git a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll index 308ed13d0b8e2..b409658e1cd33 100644 --- a/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/round-fptoui-sat-scalar.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK-GI ; Round towards minus infinity (fcvtmu). @@ -18,8 +19,13 @@ define i32 @testmuwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtmu w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtmu w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -38,8 +44,13 @@ define i64 @testmuxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtmu x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testmuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtmu x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.floor.f16(half %a) nounwind readnone + %r = call half @llvm.floor.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -49,8 +60,13 @@ define i32 @testmuws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtmu w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.floor.f32(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -60,8 +76,13 @@ define i64 @testmuxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtmu x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.floor.f32(float %a) nounwind readnone + %r = call float @llvm.floor.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -71,8 +92,13 @@ define i32 @testmuwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtmu w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.floor.f64(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -82,8 +108,13 @@ define i64 @testmuxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtmu x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testmuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtmu x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.floor.f64(double %a) nounwind readnone + %r = call double @llvm.floor.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -104,8 +135,13 @@ define i32 @testpuwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtpu w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtpu w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -124,8 +160,13 @@ define i64 @testpuxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtpu x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testpuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtpu x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %r = call half @llvm.ceil.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -135,8 +176,13 @@ define i32 @testpuws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtpu w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.ceil.f32(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -146,8 +192,13 @@ define i64 @testpuxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtpu x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.ceil.f32(float %a) nounwind readnone + %r = call float @llvm.ceil.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -157,8 +208,13 @@ define i32 @testpuwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtpu w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.ceil.f64(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -168,8 +224,13 @@ define i64 @testpuxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtpu x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testpuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtpu x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.ceil.f64(double %a) nounwind readnone + %r = call double @llvm.ceil.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -190,8 +251,13 @@ define i32 @testzuwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -210,8 +276,13 @@ define i64 @testzuxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtzu x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testzuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %r = call half @llvm.trunc.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -221,8 +292,13 @@ define i32 @testzuws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.trunc.f32(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -232,8 +308,13 @@ define i64 @testzuxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.trunc.f32(float %a) nounwind readnone + %r = call float @llvm.trunc.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -243,8 +324,13 @@ define i32 @testzuwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.trunc.f64(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -254,8 +340,13 @@ define i64 @testzuxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzu x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testzuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.trunc.f64(double %a) nounwind readnone + %r = call double @llvm.trunc.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } @@ -276,8 +367,13 @@ define i32 @testauwh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtau w0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testauwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtau w0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) ret i32 %i } @@ -296,8 +392,13 @@ define i64 @testauxh(half %a) { ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: fcvtau x0, h0 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testauxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtau x0, h0 +; CHECK-GI-NEXT: ret entry: - %r = call half @llvm.round.f16(half %a) nounwind readnone + %r = call half @llvm.round.f16(half %a) %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) ret i64 %i } @@ -307,8 +408,13 @@ define i32 @testauws(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau w0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtau w0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.round.f32(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) ret i32 %i } @@ -318,8 +424,13 @@ define i64 @testauxs(float %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau x0, s0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtau x0, s0 +; CHECK-GI-NEXT: ret entry: - %r = call float @llvm.round.f32(float %a) nounwind readnone + %r = call float @llvm.round.f32(float %a) %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) ret i64 %i } @@ -329,8 +440,13 @@ define i32 @testauwd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau w0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtau w0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.round.f64(double %a) nounwind readnone + %r = call double @llvm.round.f64(double %a) %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) ret i32 %i } @@ -340,28 +456,130 @@ define i64 @testauxd(double %a) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtau x0, d0 ; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testauxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtau x0, d0 +; CHECK-GI-NEXT: ret +entry: + %r = call double @llvm.round.f64(double %a) + %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) + ret i64 %i +} + +; Round to nearest, ties to even (fcvtnu). + +define i32 @testnuwh(half %a) { +; CHECK-CVT-LABEL: testnuwh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu w0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnuwh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtnu w0, h0 +; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnuwh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtnu w0, h0 +; CHECK-GI-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + ret i32 %i +} + +define i64 @testnuxh(half %a) { +; CHECK-CVT-LABEL: testnuxh: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: testnuxh: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtnu x0, h0 +; CHECK-FP16-NEXT: ret +; +; CHECK-GI-LABEL: testnuxh: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtnu x0, h0 +; CHECK-GI-NEXT: ret +entry: + %r = call half @llvm.roundeven.f16(half %a) + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + ret i64 %i +} + +define i32 @testnuws(float %a) { +; CHECK-LABEL: testnuws: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu w0, s0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuws: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtnu w0, s0 +; CHECK-GI-NEXT: ret +entry: + %r = call float @llvm.roundeven.f32(float %a) + %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) + ret i32 %i +} + +define i64 @testnuxs(float %a) { +; CHECK-LABEL: testnuxs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu x0, s0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuxs: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtnu x0, s0 +; CHECK-GI-NEXT: ret +entry: + %r = call float @llvm.roundeven.f32(float %a) + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + ret i64 %i +} + +define i32 @testnuwd(double %a) { +; CHECK-LABEL: testnuwd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu w0, d0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuwd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtnu w0, d0 +; CHECK-GI-NEXT: ret +entry: + %r = call double @llvm.roundeven.f64(double %a) + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + ret i32 %i +} + +define i64 @testnuxd(double %a) { +; CHECK-LABEL: testnuxd: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtnu x0, d0 +; CHECK-NEXT: ret +; +; CHECK-GI-LABEL: testnuxd: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fcvtnu x0, d0 +; CHECK-GI-NEXT: ret entry: - %r = call double @llvm.round.f64(double %a) nounwind readnone + %r = call double @llvm.roundeven.f64(double %a) %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) ret i64 %i } -declare i32 @llvm.fptoui.sat.i32.f16 (half) -declare i64 @llvm.fptoui.sat.i64.f16 (half) -declare i32 @llvm.fptoui.sat.i32.f32 (float) -declare i64 @llvm.fptoui.sat.i64.f32 (float) -declare i32 @llvm.fptoui.sat.i32.f64 (double) -declare i64 @llvm.fptoui.sat.i64.f64 (double) - -declare half @llvm.floor.f16(half) nounwind readnone -declare half @llvm.ceil.f16(half) nounwind readnone -declare half @llvm.trunc.f16(half) nounwind readnone -declare half @llvm.round.f16(half) nounwind readnone -declare float @llvm.floor.f32(float) nounwind readnone -declare float @llvm.ceil.f32(float) nounwind readnone -declare float @llvm.trunc.f32(float) nounwind readnone -declare float @llvm.round.f32(float) nounwind readnone -declare double @llvm.floor.f64(double) nounwind readnone -declare double @llvm.ceil.f64(double) nounwind readnone -declare double @llvm.trunc.f64(double) nounwind readnone -declare double @llvm.round.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index fb571eff39fe5..9f4f00fda7cdf 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s ; CHECK: .LCPI0_0: @@ -700,23 +700,15 @@ define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8 define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l249, <2 x double> %l267, <2 x double> %l285, <2 x double> %l303, <2 x double> %l321, <2 x double> %l339) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: -; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: frintm v4.2d, v4.2d +; CHECK-NEXT: fcvtms v0.2d, v0.2d +; CHECK-NEXT: fcvtms v4.2d, v4.2d ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: frintm v1.2d, v1.2d -; CHECK-NEXT: frintm v5.2d, v5.2d -; CHECK-NEXT: frintm v2.2d, v2.2d -; CHECK-NEXT: frintm v6.2d, v6.2d -; CHECK-NEXT: frintm v3.2d, v3.2d -; CHECK-NEXT: frintm v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtms v1.2d, v1.2d +; CHECK-NEXT: fcvtms v5.2d, v5.2d +; CHECK-NEXT: fcvtms v2.2d, v2.2d +; CHECK-NEXT: fcvtms v6.2d, v6.2d +; CHECK-NEXT: fcvtms v3.2d, v3.2d +; CHECK-NEXT: fcvtms v7.2d, v7.2d ; CHECK-NEXT: xtn v16.2s, v0.2d ; CHECK-NEXT: xtn v20.2s, v4.2d ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]