diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index a94e131dd7214..54c89721bc1f0 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -117,8 +117,10 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - if (Subtarget.useHVX128BOps()) + if (Subtarget.useHVX128BOps()) { setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); + setOperationAction(ISD::BITCAST, MVT::v64i1, Custom); + } if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { @@ -2024,13 +2026,9 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { // Handle bitcast from i32, v2i16, and v4i8 to v32i1. // Splat the input into a 32-element i32 vector, then AND each element // with a unique bitmask to isolate individual bits. - if (ResTy == MVT::v32i1 && - (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && - Subtarget.useHVX128BOps()) { - SDValue Val32 = Val; - if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) - Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); - + auto bitcastI32ToV32I1 = [&](SDValue Val32) { + assert(Val32.getValueType().getSizeInBits() == 32 && + "Input must be 32 bits"); MVT VecTy = MVT::getVectorVT(MVT::i32, 32); SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32); SmallVector Mask; @@ -2039,7 +2037,31 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask); SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec); - return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded); + return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded); + }; + // === Case: v32i1 === + if (ResTy == MVT::v32i1 && + (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && + Subtarget.useHVX128BOps()) { + SDValue Val32 = Val; + if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) + Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); + return bitcastI32ToV32I1(Val32); + } + // === Case: v64i1 === + if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) { + // Split i64 into lo/hi 32-bit halves. + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val); + SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val, + DAG.getConstant(32, dl, MVT::i64)); + SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted); + + // Reuse the same 32-bit logic twice. + SDValue LoRes = bitcastI32ToV32I1(Lo); + SDValue HiRes = bitcastI32ToV32I1(Hi); + + // Concatenate into a v64i1 predicate. + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes); } if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll new file mode 100644 index 0000000000000..f7e5cdbaecee5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-i64-to-v64i1.ll @@ -0,0 +1,33 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s +; CHECK-DAG: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = combine(##.LCPI0_0,#-1) +; CHECK-DAG: [[VREG1:v([0-9]+)]] = vmem(r[[REGH]]+#0) +; CHECK-DAG: [[REG1:(r[0-9]+)]] = memw(r{{[0-9]+}}+#4) +; CHECK-DAG: [[VREG2:v([0-9]+)]] = vsplat([[REG1]]) +; CHECK-DAG: [[REG2:(r[0-9]+)]] = memw(r{{[0-9]+}}+#0) +; CHECK-DAG: [[VREG3:v([0-9]+)]] = vsplat([[REG2]]) +; CHECK-DAG: [[VREG4:v([0-9]+)]] = vand([[VREG2]],[[VREG1]]) +; CHECK-DAG: [[VREG5:v([0-9]+)]] = vand([[VREG3]],[[VREG1]]) +; CHECK-DAG: [[QREG:q[0-9]+]] = vand([[VREG4]],r{{[0-9]+}}) +; CHECK-DAG: [[VREG6:v([0-9]+)]] = vand([[QREG]],r{{[0-9]+}}) +; CHECK-DAG: [[QREG1:q[0-9]+]] = vand([[VREG5]],r{{[0-9]+}}) +; CHECK-DAG: [[VREG7:v([0-9]+)]] = vand([[QREG1]],r{{[0-9]+}}) +; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK-DAG: [[VREG8:v([0-9]+)]] = vror(v{{[0-9]+}},r{{[0-9]+}}) +; CHECK-DAG: [[VREG9:v([0-9]+)]] = vor([[VREG8]],v{{[0-9]+}}) +; CHECK-DAG: q{{[0-9]+}} = vand([[VREG9]],r{{[0-9]+}}) +define void @bitcast_i64_to_v64i1_full(ptr %in, ptr %out) { +entry: + %load = load i64, ptr %in, align 4 + %bitcast = bitcast i64 %load to <64 x i1> + %e0 = extractelement <64 x i1> %bitcast, i32 0 + %e1 = extractelement <64 x i1> %bitcast, i32 1 + %z0 = zext i1 %e0 to i8 + %z1 = zext i1 %e1 to i8 + %ptr0 = getelementptr i8, ptr %out, i32 0 + %ptr1 = getelementptr i8, ptr %out, i32 1 + store i8 %z0, ptr %ptr0, align 1 + store i8 %z1, ptr %ptr1, align 1 + ret void +} +