From 41085357df2b2926bbe4494d8572395f891fdd02 Mon Sep 17 00:00:00 2001 From: Shubham Pawar Date: Wed, 19 Jun 2019 11:38:06 -0500 Subject: [PATCH] [Hexagon] Extend OptAddrMode pass to vgather This change extends the addressing mode optimization pass to HVX vgather. This is specifically intended to resolve compiler not generating indexed addresses for vgather stores to vtcm. Changed the vgather pseudo instructions to accept an immediate operand and handled addition of appropriate immediate operand in addressing mode optimization pass. --- .../Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 7 +- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 44 ++++---- .../lib/Target/Hexagon/HexagonOptAddrMode.cpp | 101 ++++++++++++++--- llvm/lib/Target/Hexagon/HexagonPatternsV65.td | 27 +++-- .../Hexagon/packetize-vgather-slot01.mir | 4 +- llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll | 103 ++++++++++++++++++ .../CodeGen/Hexagon/vgather-packetize.mir | 2 +- 7 files changed, 234 insertions(+), 54 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 95e202647246e7..0a6dd727eb82d3 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -2393,6 +2393,7 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { SDValue Base = N->getOperand(4); SDValue Modifier = N->getOperand(5); SDValue Offset = N->getOperand(6); + SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); unsigned Opcode; unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); @@ -2414,7 +2415,8 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { } SDVTList VTs = CurDAG->getVTList(MVT::Other); - SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain }; + SDValue Ops[] = { Address, ImmOperand, + Predicate, Base, Modifier, Offset, Chain }; SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); MachineMemOperand *MemOp = cast(N)->getMemOperand(); @@ -2430,6 +2432,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { SDValue Base = N->getOperand(3); SDValue Modifier = N->getOperand(4); SDValue Offset = N->getOperand(5); + SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); unsigned Opcode; unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); @@ -2451,7 +2454,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { } SDVTList VTs = CurDAG->getVTList(MVT::Other); - SDValue Ops[] = { Address, Base, Modifier, Offset, Chain }; + SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain }; SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); MachineMemOperand *MemOp = cast(N)->getMemOperand(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index a38e4370913292..a36be6b02caa81 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1506,75 +1506,75 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const { switch (Opc) { case Hexagon::V6_vgathermh_pseudo: First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) - .add(MI.getOperand(1)) .add(MI.getOperand(2)) - .add(MI.getOperand(3)); + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) .add(MI.getOperand(0)) - .addImm(0) + .addImm(MI.getOperand(1).getImm()) .addReg(Hexagon::VTMP); MBB.erase(MI); return First.getInstrIterator(); case Hexagon::V6_vgathermw_pseudo: First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) - .add(MI.getOperand(1)) .add(MI.getOperand(2)) - .add(MI.getOperand(3)); + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) .add(MI.getOperand(0)) - .addImm(0) + .addImm(MI.getOperand(1).getImm()) .addReg(Hexagon::VTMP); MBB.erase(MI); return First.getInstrIterator(); case Hexagon::V6_vgathermhw_pseudo: First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) - .add(MI.getOperand(1)) .add(MI.getOperand(2)) - .add(MI.getOperand(3)); + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) .add(MI.getOperand(0)) - .addImm(0) + .addImm(MI.getOperand(1).getImm()) .addReg(Hexagon::VTMP); MBB.erase(MI); return First.getInstrIterator(); case Hexagon::V6_vgathermhq_pseudo: First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) - .add(MI.getOperand(1)) .add(MI.getOperand(2)) .add(MI.getOperand(3)) - .add(MI.getOperand(4)); + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) .add(MI.getOperand(0)) - .addImm(0) + .addImm(MI.getOperand(1).getImm()) .addReg(Hexagon::VTMP); MBB.erase(MI); return First.getInstrIterator(); case Hexagon::V6_vgathermwq_pseudo: First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) - .add(MI.getOperand(1)) .add(MI.getOperand(2)) .add(MI.getOperand(3)) - .add(MI.getOperand(4)); + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) .add(MI.getOperand(0)) - .addImm(0) + .addImm(MI.getOperand(1).getImm()) .addReg(Hexagon::VTMP); MBB.erase(MI); return First.getInstrIterator(); case Hexagon::V6_vgathermhwq_pseudo: First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) - .add(MI.getOperand(1)) .add(MI.getOperand(2)) .add(MI.getOperand(3)) - .add(MI.getOperand(4)); + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) .add(MI.getOperand(0)) - .addImm(0) + .addImm(MI.getOperand(1).getImm()) .addReg(Hexagon::VTMP); MBB.erase(MI); return First.getInstrIterator(); @@ -2767,7 +2767,13 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::V6_vL32b_nt_ai: case Hexagon::V6_vS32b_nt_ai: case Hexagon::V6_vL32Ub_ai: - case Hexagon::V6_vS32Ub_ai: { + case Hexagon::V6_vS32Ub_ai: + case Hexagon::V6_vgathermh_pseudo: + case Hexagon::V6_vgathermw_pseudo: + case Hexagon::V6_vgathermhw_pseudo: + case Hexagon::V6_vgathermhq_pseudo: + case Hexagon::V6_vgathermwq_pseudo: + case Hexagon::V6_vgathermhwq_pseudo: { unsigned VectorSize = TRI->getSpillSize(Hexagon::HvxVRRegClass); assert(isPowerOf2_32(VectorSize)); if (Offset & (VectorSize-1)) diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 2cdfbe7845b632..988b88a0f0ae41 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -110,6 +110,8 @@ class HexagonOptAddrMode : public MachineFunctionPass { bool changeAddAsl(NodeAddr AddAslUN, MachineInstr *AddAslMI, const MachineOperand &ImmOp, unsigned ImmOpNum); bool isValidOffset(MachineInstr *MI, int Offset); + unsigned getBaseOpPosition(MachineInstr *MI); + unsigned getOffsetOpPosition(MachineInstr *MI); }; } // end anonymous namespace @@ -322,6 +324,25 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr SN, } bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) { + if (HII->isHVXVec(*MI)) { + // only HVX vgather instructions handled + // TODO: extend the pass to other vector load/store operations + switch (MI->getOpcode()) { + case Hexagon::V6_vgathermh_pseudo: + case Hexagon::V6_vgathermw_pseudo: + case Hexagon::V6_vgathermhw_pseudo: + case Hexagon::V6_vgathermhq_pseudo: + case Hexagon::V6_vgathermwq_pseudo: + case Hexagon::V6_vgathermhwq_pseudo: + return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false); + default: + return false; + } + } + + if (HII->getAddrMode(*MI) != HexagonII::BaseImmOffset) + return false; + unsigned AlignMask = 0; switch (HII->getMemAccessSize(*MI)) { case HexagonII::MemAccessSize::DoubleWordAccess: @@ -345,29 +366,62 @@ bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) { return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false); } +unsigned HexagonOptAddrMode::getBaseOpPosition(MachineInstr *MI) { + const MCInstrDesc &MID = MI->getDesc(); + switch (MI->getOpcode()) { + // vgather pseudos are mayLoad and mayStore + // hence need to explicitly specify Base and + // Offset operand positions + case Hexagon::V6_vgathermh_pseudo: + case Hexagon::V6_vgathermw_pseudo: + case Hexagon::V6_vgathermhw_pseudo: + case Hexagon::V6_vgathermhq_pseudo: + case Hexagon::V6_vgathermwq_pseudo: + case Hexagon::V6_vgathermhwq_pseudo: + return 0; + default: + return MID.mayLoad() ? 1 : 0; + } +} + +unsigned HexagonOptAddrMode::getOffsetOpPosition(MachineInstr *MI) { + const MCInstrDesc &MID = MI->getDesc(); + switch (MI->getOpcode()) { + // vgather pseudos are mayLoad and mayStore + // hence need to explicitly specify Base and + // Offset operand positions + case Hexagon::V6_vgathermh_pseudo: + case Hexagon::V6_vgathermw_pseudo: + case Hexagon::V6_vgathermhw_pseudo: + case Hexagon::V6_vgathermhq_pseudo: + case Hexagon::V6_vgathermwq_pseudo: + case Hexagon::V6_vgathermhwq_pseudo: + return 1; + default: + return MID.mayLoad() ? 2 : 1; + } +} + bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, MachineInstr *AddMI, const NodeList &UNodeList) { Register AddDefR = AddMI->getOperand(0).getReg(); + Register BaseReg = AddMI->getOperand(1).getReg(); for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr UN = *I; NodeAddr SN = UN.Addr->getOwner(*DFG); MachineInstr *MI = SN.Addr->getCode(); const MCInstrDesc &MID = MI->getDesc(); - if ((!MID.mayLoad() && !MID.mayStore()) || - HII->getAddrMode(*MI) != HexagonII::BaseImmOffset || - HII->isHVXVec(*MI)) - return false; + if ((!MID.mayLoad() && !MID.mayStore())) + return false; - MachineOperand BaseOp = MID.mayLoad() ? MI->getOperand(1) - : MI->getOperand(0); + MachineOperand BaseOp = MI->getOperand(getBaseOpPosition(MI)); if (!BaseOp.isReg() || BaseOp.getReg() != AddDefR) return false; - MachineOperand OffsetOp = MID.mayLoad() ? MI->getOperand(2) - : MI->getOperand(1); + MachineOperand OffsetOp = MI->getOperand(getOffsetOpPosition(MI)); if (!OffsetOp.isImm()) return false; @@ -382,11 +436,19 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, // Ex: Rx= add(Rt,#10) // memw(Rx+#0) = Rs // will be replaced with => memw(Rt+#10) = Rs - Register BaseReg = AddMI->getOperand(1).getReg(); if (!isSafeToExtLR(AddSN, AddMI, BaseReg, UNodeList)) return false; } + NodeId LRExtRegRD = 0; + // Iterate through all the UseNodes in SN and find the reaching def + // for the LRExtReg. + for (NodeAddr UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) { + RegisterRef RR = UA.Addr->getRegRef(*DFG); + if (BaseReg == RR.Reg) + LRExtRegRD = UA.Addr->getReachingDef(); + } + // Update all the uses of 'add' with the appropriate base and offset // values. bool Changed = false; @@ -400,6 +462,12 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, LLVM_DEBUG(dbgs() << "\t\t[MI getParent()->getNumber() << ">]: " << *UseMI << "\n"); Changed |= updateAddUses(AddMI, UseMI); + + // Set the reachingDef for UseNode under consideration + // after updating the Add use. This local change is + // to avoid rebuilding of the RDF graph after update. + NodeAddr LRExtRegDN = DFG->addr(LRExtRegRD); + UseN.Addr->linkToDef(UseN.Id, LRExtRegDN); } if (Changed) @@ -409,21 +477,18 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, } bool HexagonOptAddrMode::updateAddUses(MachineInstr *AddMI, - MachineInstr *UseMI) { + MachineInstr *UseMI) { const MachineOperand ImmOp = AddMI->getOperand(2); const MachineOperand AddRegOp = AddMI->getOperand(1); - Register newReg = AddRegOp.getReg(); - const MCInstrDesc &MID = UseMI->getDesc(); + Register NewReg = AddRegOp.getReg(); - MachineOperand &BaseOp = MID.mayLoad() ? UseMI->getOperand(1) - : UseMI->getOperand(0); - MachineOperand &OffsetOp = MID.mayLoad() ? UseMI->getOperand(2) - : UseMI->getOperand(1); - BaseOp.setReg(newReg); + MachineOperand &BaseOp = UseMI->getOperand(getBaseOpPosition(UseMI)); + MachineOperand &OffsetOp = UseMI->getOperand(getOffsetOpPosition(UseMI)); + BaseOp.setReg(NewReg); BaseOp.setIsUndef(AddRegOp.isUndef()); BaseOp.setImplicit(AddRegOp.isImplicit()); OffsetOp.setImm(ImmOp.getImm() + OffsetOp.getImm()); - MRI->clearKillFlags(newReg); + MRI->clearKillFlags(NewReg); return true; } diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td index 4cd45ecbe1a1f4..3f5d05d1c0e133 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td @@ -9,8 +9,8 @@ multiclass vgathermh { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), - (ins IntRegs:$_dst_, IntRegs:$Rt, - ModRegs:$Mu, RC:$Vv), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + IntRegs:$Rt, ModRegs:$Mu, RC:$Vv), ".error \"should not emit\" ", []>; } @@ -18,8 +18,8 @@ multiclass vgathermh { multiclass vgathermw { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), - (ins IntRegs:$_dst_, IntRegs:$Rt, - ModRegs:$Mu, RC:$Vv), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + IntRegs:$Rt, ModRegs:$Mu, RC:$Vv), ".error \"should not emit\" ", []>; } @@ -27,8 +27,8 @@ multiclass vgathermw { multiclass vgathermhw { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), - (ins IntRegs:$_dst_, IntRegs:$Rt, - ModRegs:$Mu, RC:$Vv), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + IntRegs:$Rt, ModRegs:$Mu, RC:$Vv), ".error \"should not emit\" ", []>; } @@ -40,8 +40,9 @@ defm V6_vgathermhw_pseudo : vgathermhw; multiclass vgathermhq { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), - (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt, - ModRegs:$Mu, RC1:$Vv), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + RC2:$Vq, IntRegs:$Rt, ModRegs:$Mu, + RC1:$Vv), ".error \"should not emit\" ", []>; } @@ -49,8 +50,9 @@ multiclass vgathermhq { multiclass vgathermwq { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), - (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt, - ModRegs:$Mu, RC1:$Vv), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + RC2:$Vq, IntRegs:$Rt, ModRegs:$Mu, + RC1:$Vv), ".error \"should not emit\" ", []>; } @@ -58,8 +60,9 @@ multiclass vgathermwq { multiclass vgathermhwq { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), - (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt, - ModRegs:$Mu, RC1:$Vv), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + RC2:$Vq, IntRegs:$Rt, ModRegs:$Mu, + RC1:$Vv), ".error \"should not emit\" ", []>; } diff --git a/llvm/test/CodeGen/Hexagon/packetize-vgather-slot01.mir b/llvm/test/CodeGen/Hexagon/packetize-vgather-slot01.mir index 5271b8535b219c..fd9df82e795987 100644 --- a/llvm/test/CodeGen/Hexagon/packetize-vgather-slot01.mir +++ b/llvm/test/CodeGen/Hexagon/packetize-vgather-slot01.mir @@ -15,8 +15,8 @@ body: | bb.0: liveins: $r0, $r1, $r2, $r3, $m0, $v0, $v1, $v2 $v0 = V6_vaslw_acc $v0, killed $v1, $r0 - V6_vgathermw_pseudo $r1, $r2, $m0, killed $v2 + V6_vgathermw_pseudo $r1, 0, $r2, $m0, killed $v2 $r4 = A2_addi $r1, 512 $r5 = A2_addi $r1, 640 - V6_vgathermw_pseudo killed $r3, $r2, $m0, killed $v0 + V6_vgathermw_pseudo killed $r3, 0, $r2, $m0, killed $v0 ... diff --git a/llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll b/llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll new file mode 100644 index 00000000000000..b43f79a3b4152d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=hexagon -O3 -disable-hexagon-amodeopt < %s | FileCheck %s --check-prefix=CHECK-NO-AMODE +; RUN: llc -march=hexagon -O3 < %s | FileCheck %s --check-prefix=CHECK-AMODE + +; CHECK-NO-AMODE: [[REG1:(r[0-9]+)]] = add({{r[0-9]+}},#0) + +; CHECK-NO-AMODE: [[REG2:(r[0-9]+)]] = add([[REG1]],#128) +; CHECK-NO-AMODE: [[REG3:(r[0-9]+)]] = add([[REG1]],#256) +; CHECK-NO-AMODE: [[REG4:(r[0-9]+)]] = add([[REG1]],#384) +; CHECK-NO-AMODE: [[REG5:(r[0-9]+)]] = add([[REG1]],#512) +; CHECK-NO-AMODE: [[REG6:(r[0-9]+)]] = add([[REG1]],#640) +; CHECK-NO-AMODE: vmem([[REG1]]+#0) = vtmp.new +; CHECK-NO-AMODE: vmem([[REG2]]+#0) = vtmp.new +; CHECK-NO-AMODE: vmem([[REG3]]+#0) = vtmp.new +; CHECK-NO-AMODE: vmem([[REG4]]+#0) = vtmp.new +; CHECK-NO-AMODE: vmem([[REG5]]+#0) = vtmp.new +; CHECK-NO-AMODE: vmem([[REG6]]+#0) = vtmp.new + + +; CHECK-AMODE: [[REG1:(r[0-9]+)]] = add({{r[0-9]+}},#0) +; CHECK-AMODE-NOT: {{r[0-9]+}} = add([[REG1]],{{[0-9]+}}) +; CHECK-AMODE: vmem([[REG1]]+#0) = vtmp.new +; CHECK-AMODE: vmem([[REG1]]+#1) = vtmp.new +; CHECK-AMODE: vmem([[REG1]]+#2) = vtmp.new +; CHECK-AMODE: vmem([[REG1]]+#3) = vtmp.new +; CHECK-AMODE: vmem([[REG1]]+#4) = vtmp.new +; CHECK-AMODE: vmem([[REG1]]+#5) = vtmp.new + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +; Function Attrs: nounwind readnone +define dso_local void @contiguos_vgather_test(i32 %Rb, i32 %mu, i32 %nloops, <32 x i32> %Vv, <64 x i32> %Vvv, <32 x i32> %Qs) local_unnamed_addr #0 { +entry: + %Vout1 = alloca <32 x i32>, align 128 + %0 = bitcast <32 x i32>* %Vout1 to i8* + call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %0) #2 + %cmp23 = icmp sgt i32 %nloops, 0 + br i1 %cmp23, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %add.ptr = getelementptr inbounds <32 x i32>, <32 x i32>* %Vout1, i32 1 + %1 = bitcast <32 x i32>* %add.ptr to i8* + %add.ptr1 = getelementptr inbounds <32 x i32>, <32 x i32>* %Vout1, i32 2 + %2 = bitcast <32 x i32>* %add.ptr1 to i8* + %add.ptr2 = getelementptr inbounds <32 x i32>, <32 x i32>* %Vout1, i32 3 + %3 = bitcast <32 x i32>* %add.ptr2 to i8* + %4 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %Qs, i32 -1) + %add.ptr3 = getelementptr inbounds <32 x i32>, <32 x i32>* %Vout1, i32 4 + %5 = bitcast <32 x i32>* %add.ptr3 to i8* + %add.ptr4 = getelementptr inbounds <32 x i32>, <32 x i32>* %Vout1, i32 5 + %6 = bitcast <32 x i32>* %add.ptr4 to i8* + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %0) #2 + ret void + +for.body: ; preds = %for.body, %for.body.lr.ph + %i.024 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + call void @llvm.hexagon.V6.vgathermh.128B(i8* nonnull %0, i32 %Rb, i32 %mu, <32 x i32> %Vv) + call void @llvm.hexagon.V6.vgathermw.128B(i8* nonnull %1, i32 %Rb, i32 %mu, <32 x i32> %Vv) + call void @llvm.hexagon.V6.vgathermhw.128B(i8* nonnull %2, i32 %Rb, i32 %mu, <64 x i32> %Vvv) + call void @llvm.hexagon.V6.vgathermhq.128B(i8* nonnull %3, <128 x i1> %4, i32 %Rb, i32 %mu, <32 x i32> %Vv) + call void @llvm.hexagon.V6.vgathermwq.128B(i8* nonnull %5, <128 x i1> %4, i32 %Rb, i32 %mu, <32 x i32> %Vv) + call void @llvm.hexagon.V6.vgathermhwq.128B(i8* nonnull %6, <128 x i1> %4, i32 %Rb, i32 %mu, <64 x i32> %Vvv) + %inc = add nuw nsw i32 %i.024, 1 + %exitcond = icmp eq i32 %inc, %nloops + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.hexagon.V6.vgathermh.128B(i8*, i32, i32, <32 x i32>) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.hexagon.V6.vgathermw.128B(i8*, i32, i32, <32 x i32>) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.hexagon.V6.vgathermhw.128B(i8*, i32, i32, <64 x i32>) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <128 x i1>, i32, i32, <64 x i32>) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv65" "target-features"="+hvx-length128b,+hvxv65,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/CodeGen/Hexagon/vgather-packetize.mir b/llvm/test/CodeGen/Hexagon/vgather-packetize.mir index 688a770ad2bf93..7e3eb21c446e70 100644 --- a/llvm/test/CodeGen/Hexagon/vgather-packetize.mir +++ b/llvm/test/CodeGen/Hexagon/vgather-packetize.mir @@ -21,7 +21,7 @@ body: | bb.1: liveins: $r0, $r1, $r2, $m0, $w0 $r1 = A2_addi $r1, 1 - V6_vgathermhw_pseudo $r0, $r2, $m0, $w0, implicit-def $vtmp + V6_vgathermhw_pseudo killed $r0, 0, killed $r2, killed $m0, killed $w0, implicit-def $vtmp ENDLOOP0 %bb.1, implicit $lc0, implicit $sa0, implicit-def $lc0, implicit-def $p3, implicit-def $pc, implicit-def $usr bb.2: