From 91e103d17860836a2a52640b6da27ffcb3bad02d Mon Sep 17 00:00:00 2001 From: quic-santdas Date: Fri, 17 Jan 2025 13:51:29 -0800 Subject: [PATCH] [Hexagon] Added v32i1/v64i1 to v32f32/v64f16 lowering This patch introduces uint_to_fp conversions from v32i1 and v64i1 predicate vectors to v32f32 and v64f16 floating-point vectors. Patch-by: Santanu Das Change-Id: I4616238ffc29161971cdae5010ade99ac916c82e --- .../Target/Hexagon/HexagonISelLowering.cpp | 1 - llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 128 ++++++++++++++++++ .../Hexagon/isel-uinttofp-v32i1tov32f32.ll | 25 ++++ .../Hexagon/isel-uinttofp-v64i1tov64f16.ll | 27 ++++ 5 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll create mode 100644 llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index c54b67ccd8843..9f7f434b66fa1 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3352,7 +3352,6 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); - // Handle INLINEASM first. if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR) return LowerINLINEASM(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 9ebbbc6399b42..8d04edbea5b43 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -577,6 +577,8 @@ class HexagonTargetLowering : public TargetLowering { SDValue LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxPred32ToFp(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxPred64ToFp(SDValue Op, SelectionDAG &DAG) const; SDValue ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const; SDValue ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index ff02a67d54363..d0dfa47468705 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -446,6 +446,10 @@ HexagonTargetLowering::initializeHVXLowering() { } } + // Include cases which are not hander earlier + setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom); + setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT}); } @@ -2333,6 +2337,123 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { return ExpandHvxFpToInt(Op, DAG); } +// For vector type v32i1 uint_to_fp to v32f32: +// R1 = #1, R2 holds the v32i1 param +// V1 = vsplat(R1) +// V2 = vsplat(R2) +// Q0 = vand(V1,R1) +// V0.w=prefixsum(Q0) +// V0.w=vsub(V0.w,V1.w) +// V2.w = vlsr(V2.w,V0.w) +// V2 = vand(V2,V1) +// V2.sf = V2.w +SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp, + SelectionDAG &DAG) const { + + MVT ResTy = ty(PredOp); + const SDLoc &dl(PredOp); + + SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32); + SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const); + SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32, + SDValue(RegConst, 0)); + SDNode *PredTransfer = + DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1, + SDValue(SplatConst, 0), SDValue(RegConst, 0)); + SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32, + SDValue(PredTransfer, 0)); + SDNode *SplatParam = DAG.getMachineNode( + Hexagon::V6_lvsplatw, dl, MVT::v32i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0))); + SDNode *Vsub = + DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32, + SDValue(PrefixSum, 0), SDValue(SplatConst, 0)); + SDNode *IndexShift = + DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32, + SDValue(SplatParam, 0), SDValue(Vsub, 0)); + SDNode *MaskOff = + DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32, + SDValue(IndexShift, 0), SDValue(SplatConst, 0)); + SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy, + SDValue(MaskOff, 0)); + return SDValue(Convert, 0); +} + +// For vector type v64i1 uint_to_fo to v64f16: +// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1) +// R3 = subreg_high (R32) +// R2 = subreg_low (R32) +// R1 = #1 +// V1 = vsplat(R1) +// V2 = vsplat(R2) +// V3 = vsplat(R3) +// Q0 = vand(V1,R1) +// V0.w=prefixsum(Q0) +// V0.w=vsub(V0.w,V1.w) +// V2.w = vlsr(V2.w,V0.w) +// V3.w = vlsr(V3.w,V0.w) +// V2 = vand(V2,V1) +// V3 = vand(V3,V1) +// V2.h = vpacke(V3.w,V2.w) +// V2.hf = V2.h +SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp, + SelectionDAG &DAG) const { + + MVT ResTy = ty(PredOp); + const SDLoc &dl(PredOp); + + SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0)); + // Get the hi and lo regs + SDValue HiReg = + DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp); + SDValue LoReg = + DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp); + // Get constant #1 and splat into vector V1 + SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32); + SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const); + SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32, + SDValue(RegConst, 0)); + // Splat the hi and lo args + SDNode *SplatHi = + DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg)); + SDNode *SplatLo = + DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg)); + // vand between splatted const and const + SDNode *PredTransfer = + DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1, + SDValue(SplatConst, 0), SDValue(RegConst, 0)); + // Get the prefixsum + SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32, + SDValue(PredTransfer, 0)); + // Get the vsub + SDNode *Vsub = + DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32, + SDValue(PrefixSum, 0), SDValue(SplatConst, 0)); + // Get vlsr for hi and lo + SDNode *IndexShift_hi = + DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32, + SDValue(SplatHi, 0), SDValue(Vsub, 0)); + SDNode *IndexShift_lo = + DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32, + SDValue(SplatLo, 0), SDValue(Vsub, 0)); + // Get vand of hi and lo + SDNode *MaskOff_hi = + DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32, + SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0)); + SDNode *MaskOff_lo = + DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32, + SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0)); + // Pack them + SDNode *Pack = + DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16, + SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0)); + SDNode *Convert = + DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0)); + return SDValue(Convert, 0); +} + SDValue HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { // Catch invalid conversion ops (just in case). @@ -2343,6 +2464,13 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); MVT FpTy = ResTy.getVectorElementType(); + if (Op.getOpcode() == ISD::UINT_TO_FP) { + if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1) + return LowerHvxPred32ToFp(Op, DAG); + if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1) + return LowerHvxPred64ToFp(Op, DAG); + } + if (Subtarget.useHVXIEEEFPOps()) { // There are only conversions to f16. if (FpTy == MVT::f16) { diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll new file mode 100644 index 0000000000000..dfb2bc83537dc --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll @@ -0,0 +1,25 @@ +; Tests lowering of v32i1 to v32f32 + +; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b,+hvx-ieee-fp \ +; RUN: -stop-after=hexagon-isel %s -o - | FileCheck %s + +; CHECK: [[R0:%[0-9]+]]:hvxvr = V6_lvsplatw killed %{{[0-9]+}} +; CHECK-NEXT: [[R1:%[0-9]+]]:intregs = A2_tfrsi 1 +; CHECK-NEXT: [[R2:%[0-9]+]]:hvxvr = V6_lvsplatw [[R1]] +; CHECK-NEXT: [[R3:%[0-9]+]]:hvxqr = V6_vandvrt [[R2]], [[R1]] +; CHECK-NEXT: [[R4:%[0-9]+]]:hvxvr = V6_vprefixqw killed [[R3]] +; CHECK-NEXT: [[R5:%[0-9]+]]:hvxvr = V6_vsubw killed [[R4]], [[R2]] +; CHECK-NEXT: [[R6:%[0-9]+]]:hvxvr = V6_vlsrwv killed [[R0]], killed [[R5]] +; CHECK-NEXT: [[R7:%[0-9]+]]:hvxvr = V6_vand killed [[R6]], [[R2]] +; CHECK-NEXT: [[R8:%[0-9]+]]:hvxvr = V6_vconv_sf_w killed [[R7]] +; CHECK-NEXT: hvxvr = V6_vadd_sf_sf [[R8]], [[R8]] + +define <32 x float> @uitofp_i1(<32 x i16> %in0, <32 x i16> %in1) #0 +{ + %q1 = icmp eq <32 x i16> %in0, %in1 + %fp0 = uitofp <32 x i1> %q1 to <32 x float> + %out = fadd <32 x float> %fp0, %fp0 + ret <32 x float> %out +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv79" "target-features"="+hvxv79,+hvx-length128b" } diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll new file mode 100644 index 0000000000000..8769e345655e9 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll @@ -0,0 +1,27 @@ +; Tests the conversion pattern for v64i1 to v64f16 +; r0, r3 and r9 registers are i32 types converted from +; v32i1 via a bitcasting sequence. + +; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b \ +; RUN: %s -verify-machineinstrs -o - | FileCheck %s + +; CHECK: [[V3:v[0-9]+]] = vsplat([[R0:r[0-9]+]]) +; CHECK: [[Q0:q[0-9]+]] = vand([[V3]],[[R0]]) +; CHECK: [[V4:v[0-9]+]].w = prefixsum([[Q0]]) +; CHECK: [[V5:v[0-9]+]].w = vsub([[V4]].w,[[V3]].w) +; CHECK: [[V1:v[0-9]+]] = vsplat(r +; CHECK: [[V2:v[0-9]+]] = vsplat(r +; CHECK: [[V6:v[0-9]+]].w = vlsr([[V1]].w,[[V5]].w) +; CHECK: [[V7:v[0-9]+]].w = vlsr([[V2]].w,[[V5]].w) +; CHECK: [[V8:v[0-9]+]] = vand([[V6]],[[V3]]) +; CHECK: [[V9:v[0-9]+]] = vand([[V7]],[[V3]]) +; CHECK: [[V10:v[0-9]+]].h = vpacke([[V9]].w,[[V8]].w) +; CHECK: .hf = [[V10]].h + +define <64 x half> @uitofp_i1(<64 x i16> %in0, <64 x i16> %in1) +{ + %in = icmp eq <64 x i16> %in0, %in1 + %fp0 = uitofp <64 x i1> %in to <64 x half> + %out = fadd <64 x half> %fp0, %fp0 + ret <64 x half> %out +}