From 91e103d17860836a2a52640b6da27ffcb3bad02d Mon Sep 17 00:00:00 2001
From: quic-santdas <quic_santdas@quicinc.com>
Date: Fri, 17 Jan 2025 13:51:29 -0800
Subject: [PATCH] [Hexagon] Added v32i1/v64i1 to v32f32/v64f16 lowering

This patch introduces uint_to_fp conversions from v32i1 and v64i1
predicate vectors to v32f32 and v64f16 floating-point vectors.

Patch-by: Santanu Das
Change-Id: I4616238ffc29161971cdae5010ade99ac916c82e
---
 .../Target/Hexagon/HexagonISelLowering.cpp    |   1 -
 llvm/lib/Target/Hexagon/HexagonISelLowering.h |   2 +
 .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 128 ++++++++++++++++++
 .../Hexagon/isel-uinttofp-v32i1tov32f32.ll    |  25 ++++
 .../Hexagon/isel-uinttofp-v64i1tov64f16.ll    |  27 ++++
 5 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
 create mode 100644 llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll

diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index c54b67ccd8843..9f7f434b66fa1 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3352,7 +3352,6 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   unsigned Opc = Op.getOpcode();
-
   // Handle INLINEASM first.
   if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
     return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 9ebbbc6399b42..8d04edbea5b43 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -577,6 +577,8 @@ class HexagonTargetLowering : public TargetLowering {
   SDValue LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerHvxPred32ToFp(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerHvxPred64ToFp(SDValue Op, SelectionDAG &DAG) const;
   SDValue ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
   SDValue ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index ff02a67d54363..d0dfa47468705 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -446,6 +446,10 @@ HexagonTargetLowering::initializeHVXLowering() {
     }
   }
 
+  // Include cases which are not hander earlier
+  setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom);
+
   setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
 }
 
@@ -2333,6 +2337,123 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
   return ExpandHvxFpToInt(Op, DAG);
 }
 
+// For vector type v32i1 uint_to_fp to v32f32:
+// R1 = #1, R2 holds the v32i1 param
+// V1 = vsplat(R1)
+// V2 = vsplat(R2)
+// Q0 = vand(V1,R1)
+// V0.w=prefixsum(Q0)
+// V0.w=vsub(V0.w,V1.w)
+// V2.w = vlsr(V2.w,V0.w)
+// V2 = vand(V2,V1)
+// V2.sf = V2.w
+SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
+                                                  SelectionDAG &DAG) const {
+
+  MVT ResTy = ty(PredOp);
+  const SDLoc &dl(PredOp);
+
+  SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
+  SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
+  SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+                                          SDValue(RegConst, 0));
+  SDNode *PredTransfer =
+      DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
+                         SDValue(SplatConst, 0), SDValue(RegConst, 0));
+  SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
+                                         SDValue(PredTransfer, 0));
+  SDNode *SplatParam = DAG.getMachineNode(
+      Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+      DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
+  SDNode *Vsub =
+      DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
+                         SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
+  SDNode *IndexShift =
+      DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+                         SDValue(SplatParam, 0), SDValue(Vsub, 0));
+  SDNode *MaskOff =
+      DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+                         SDValue(IndexShift, 0), SDValue(SplatConst, 0));
+  SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
+                                       SDValue(MaskOff, 0));
+  return SDValue(Convert, 0);
+}
+
+// For vector type v64i1 uint_to_fo to v64f16:
+// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
+// R3 = subreg_high (R32)
+// R2 = subreg_low (R32)
+// R1 = #1
+// V1 = vsplat(R1)
+// V2 = vsplat(R2)
+// V3 = vsplat(R3)
+// Q0 = vand(V1,R1)
+// V0.w=prefixsum(Q0)
+// V0.w=vsub(V0.w,V1.w)
+// V2.w = vlsr(V2.w,V0.w)
+// V3.w = vlsr(V3.w,V0.w)
+// V2 = vand(V2,V1)
+// V3 = vand(V3,V1)
+// V2.h = vpacke(V3.w,V2.w)
+// V2.hf = V2.h
+SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
+                                                  SelectionDAG &DAG) const {
+
+  MVT ResTy = ty(PredOp);
+  const SDLoc &dl(PredOp);
+
+  SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
+  // Get the hi and lo regs
+  SDValue HiReg =
+      DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
+  SDValue LoReg =
+      DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
+  // Get constant #1 and splat into vector V1
+  SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
+  SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
+  SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+                                          SDValue(RegConst, 0));
+  // Splat the hi and lo args
+  SDNode *SplatHi =
+      DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
+  SDNode *SplatLo =
+      DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
+  // vand between splatted const and const
+  SDNode *PredTransfer =
+      DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
+                         SDValue(SplatConst, 0), SDValue(RegConst, 0));
+  // Get the prefixsum
+  SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
+                                         SDValue(PredTransfer, 0));
+  // Get the vsub
+  SDNode *Vsub =
+      DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
+                         SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
+  // Get vlsr for hi and lo
+  SDNode *IndexShift_hi =
+      DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+                         SDValue(SplatHi, 0), SDValue(Vsub, 0));
+  SDNode *IndexShift_lo =
+      DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+                         SDValue(SplatLo, 0), SDValue(Vsub, 0));
+  // Get vand of hi and lo
+  SDNode *MaskOff_hi =
+      DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+                         SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
+  SDNode *MaskOff_lo =
+      DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+                         SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
+  // Pack them
+  SDNode *Pack =
+      DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
+                         SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
+  SDNode *Convert =
+      DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
+  return SDValue(Convert, 0);
+}
+
 SDValue
 HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
   // Catch invalid conversion ops (just in case).
@@ -2343,6 +2464,13 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
   MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
   MVT FpTy = ResTy.getVectorElementType();
 
+  if (Op.getOpcode() == ISD::UINT_TO_FP) {
+    if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
+      return LowerHvxPred32ToFp(Op, DAG);
+    if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
+      return LowerHvxPred64ToFp(Op, DAG);
+  }
+
   if (Subtarget.useHVXIEEEFPOps()) {
     // There are only conversions to f16.
     if (FpTy == MVT::f16) {
diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
new file mode 100644
index 0000000000000..dfb2bc83537dc
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
@@ -0,0 +1,25 @@
+; Tests lowering of v32i1 to v32f32
+
+; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b,+hvx-ieee-fp \
+; RUN: -stop-after=hexagon-isel %s -o - | FileCheck %s
+
+; CHECK: [[R0:%[0-9]+]]:hvxvr = V6_lvsplatw killed %{{[0-9]+}}
+; CHECK-NEXT: [[R1:%[0-9]+]]:intregs = A2_tfrsi 1
+; CHECK-NEXT: [[R2:%[0-9]+]]:hvxvr = V6_lvsplatw [[R1]]
+; CHECK-NEXT: [[R3:%[0-9]+]]:hvxqr = V6_vandvrt [[R2]], [[R1]]
+; CHECK-NEXT: [[R4:%[0-9]+]]:hvxvr = V6_vprefixqw killed [[R3]]
+; CHECK-NEXT: [[R5:%[0-9]+]]:hvxvr = V6_vsubw killed [[R4]], [[R2]]
+; CHECK-NEXT: [[R6:%[0-9]+]]:hvxvr = V6_vlsrwv killed [[R0]], killed [[R5]]
+; CHECK-NEXT: [[R7:%[0-9]+]]:hvxvr = V6_vand killed [[R6]], [[R2]]
+; CHECK-NEXT: [[R8:%[0-9]+]]:hvxvr = V6_vconv_sf_w killed [[R7]]
+; CHECK-NEXT: hvxvr = V6_vadd_sf_sf [[R8]], [[R8]]
+
+define <32 x float> @uitofp_i1(<32 x i16> %in0, <32 x i16> %in1) #0
+{
+   %q1 = icmp eq <32 x i16> %in0, %in1
+   %fp0 = uitofp <32 x i1> %q1 to <32 x float>
+   %out = fadd <32 x float> %fp0, %fp0
+   ret <32 x float> %out
+}
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv79" "target-features"="+hvxv79,+hvx-length128b" }
diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll
new file mode 100644
index 0000000000000..8769e345655e9
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll
@@ -0,0 +1,27 @@
+; Tests the conversion pattern for v64i1 to v64f16
+; r0, r3 and r9 registers are i32 types converted from
+; v32i1 via a bitcasting sequence.
+
+; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b \
+; RUN: %s -verify-machineinstrs -o - | FileCheck %s
+
+; CHECK: [[V3:v[0-9]+]] = vsplat([[R0:r[0-9]+]])
+; CHECK: [[Q0:q[0-9]+]] = vand([[V3]],[[R0]])
+; CHECK: [[V4:v[0-9]+]].w = prefixsum([[Q0]])
+; CHECK: [[V5:v[0-9]+]].w = vsub([[V4]].w,[[V3]].w)
+; CHECK: [[V1:v[0-9]+]] = vsplat(r
+; CHECK: [[V2:v[0-9]+]] = vsplat(r
+; CHECK: [[V6:v[0-9]+]].w = vlsr([[V1]].w,[[V5]].w)
+; CHECK: [[V7:v[0-9]+]].w = vlsr([[V2]].w,[[V5]].w)
+; CHECK: [[V8:v[0-9]+]] = vand([[V6]],[[V3]])
+; CHECK: [[V9:v[0-9]+]] = vand([[V7]],[[V3]])
+; CHECK: [[V10:v[0-9]+]].h = vpacke([[V9]].w,[[V8]].w)
+; CHECK: .hf = [[V10]].h
+
+define <64 x half> @uitofp_i1(<64 x i16> %in0, <64 x i16> %in1)
+{
+   %in = icmp eq <64 x i16> %in0, %in1
+   %fp0 = uitofp <64 x i1> %in to <64 x half>
+   %out = fadd <64 x half> %fp0, %fp0
+   ret <64 x half> %out
+}