diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 212a57bc7cde5..0b782d79237da 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -31,6 +31,10 @@ static cl::opt HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors")); +static cl::opt + EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), + cl::desc("Enable FP fast conversion routine.")); + static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; @@ -2970,6 +2974,32 @@ HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); assert(InpTy.changeTypeToInteger() == ResTy); + // At this point this is an experiment under a flag. + // In arch before V81 the rounding mode is towards nearest value. + // The C/C++ standard requires rounding towards zero: + // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a + // finite value of real floating type is converted to an integer type, the + // fractional part is discarded (i.e., the value is truncated toward zero)." + // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a + // floating-point type can be converted to a prvalue of an integer type. The + // conversion truncates; that is, the fractional part is discarded." + if (InpTy == MVT::v64f16) { + if (Subtarget.useHVXV81Ops()) { + // This is c/c++ compliant + SDValue ConvVec = + getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG); + return ConvVec; + } else if (EnableFpFastConvert) { + // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf + SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG); + return ConvVec; + } + } else if (EnableFpFastConvert && InpTy == MVT::v32f32) { + // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf + SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG); + return ConvVec; + } + // int32_t conv_f32_to_i32(uint32_t inp) { // // s | exp8 | frac23 // diff --git a/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int_2.ll b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int_2.ll new file mode 100644 index 0000000000000..03e484a6721a7 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int_2.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=hexagon -hexagon-hvx-widen=32 -hexagon-fp-fast-convert=true -mattr=+hvxv68,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s --check-prefix=CHECK-V68 +; RUN: llc -mtriple=hexagon -hexagon-hvx-widen=32 -hexagon-fp-fast-convert=true -mattr=+hvxv81,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s --check-prefix=CHECK-V81 + +; ---------------------------- +; V68 Tests +; ---------------------------- + +; f16 -> s16 (No widening) +define void @f16s16_0(ptr %a0, ptr %a1) #0 { +; CHECK-V68-LABEL: f16s16_0: +; CHECK-V68: { +; CHECK-V68: [[DST:v[0-9]+]].h = [[SRC:v[0-9]+]].hf +; CHECK-V68-NEXT: jumpr r31 +; CHECK-V68: vmem(r1+#0) = [[DST]].new +; CHECK-V68-NEXT: } + %v0 = load <64 x half>, ptr %a0, align 128 + %v1 = fptosi <64 x half> %v0 to <64 x i16> + store <64 x i16> %v1, ptr %a1, align 128 + ret void +} + +; f32 -> s8 (Triggers V6_vconv_w_sf) +define void @f32s8_2(ptr %a0, ptr %a1) { +; CHECK-V68-LABEL: f32s8_2: +; CHECK-V68: { +; CHECK-V68: [[SRC:v[0-9]+]] = vmem(r0+#0) +; CHECK-V68: [[SRC]].w = [[SRC]].sf +; CHECK-V68: vpack +; CHECK-V68: vpack +; CHECK-V68: vpack +; CHECK-V68: jumpr r31 +; CHECK-V68: vmem(r1+#0) = [[DST:v[0-9]+]] +; CHECK-V68-NEXT: } + %v0 = load <32 x float>, ptr %a0, align 128 + %v1 = fptosi <32 x float> %v0 to <32 x i8> + store <32 x i8> %v1, ptr %a1, align 128 + ret void +} + +; ---------------------------- +; V81 Tests +; ---------------------------- + +; f16 -> s16 with rounding (V6_vconv_h_hf_rnd) +define void @f16s16_v81(ptr %a0, ptr %a1) { +; CHECK-V81-LABEL: f16s16_v81: +; CHECK-V81: { +; CHECK-V81: [[DST:v[0-9]+]].h = [[SRC:v[0-9]+]].hf:rnd +; CHECK-V81-NEXT: jumpr r31 +; CHECK-V81: vmem(r1+#0) = [[DST]].new +; CHECK-V81-NEXT: } + %v0 = load <64 x half>, ptr %a0, align 128 + %v1 = fptosi <64 x half> %v0 to <64 x i16> + store <64 x i16> %v1, ptr %a1, align 128 + ret void +} + +; f32 -> s8 with V81 (still uses V6_vconv_w_sf) +define void @f32s8_v81(ptr %a0, ptr %a1) { +; CHECK-V81-LABEL: f32s8_v81: +; CHECK-V81: { +; CHECK-V81: [[SRC:v[0-9]+]] = vmem(r0+#0) +; CHECK-V81: [[SRC]].w = [[SRC]].sf +; CHECK-V81: vpack +; CHECK-V81: vpack +; CHECK-V81: vpack +; CHECK-V81: jumpr r31 +; CHECK-V81: vmem(r1+#0) = [[DST:v[0-9]+]] +; CHECK-V81-NEXT: } + %v0 = load <32 x float>, ptr %a0, align 128 + %v1 = fptosi <32 x float> %v0 to <32 x i8> + store <32 x i8> %v1, ptr %a1, align 128 + ret void +}