diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 47726d6447ad8..55bafdea234fd 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const { return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0); } +bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const { + return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf || + MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 || + MI->getOpcode() == Hexagon::V6_vmpy_qf32); +} + // Addressing mode relations. short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index c17e5277ae2e7..48adf82833f51 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -532,6 +532,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { } MCInst getNop() const override; + bool isQFPMul(const MachineInstr *MF) const; }; /// \brief Create RegSubRegPair from a register MachineOperand diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp index f29a739cb5c07..8801f698effe5 100644 --- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -58,7 +58,7 @@ // are PHI inst. // //===----------------------------------------------------------------------===// -#include + #define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass" #include "Hexagon.h" @@ -86,6 +86,9 @@ using namespace llvm; cl::opt DisableQFOptimizer("disable-qfp-opt", cl::init(false), cl::desc("Disable optimization of Qfloat operations.")); +cl::opt DisableQFOptForMul( + "disable-qfp-opt-mul", cl::init(true), + cl::desc("Disable optimization of Qfloat operations for multiply.")); namespace { const std::map QFPInstMap{ @@ -101,11 +104,21 @@ const std::map QFPInstMap{ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16}, {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf}, {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16}, - {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; + {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}, + {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32}, + {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16}, + {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32}, + {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16}, + {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32}, + {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}}; } // namespace -namespace { +namespace llvm { +FunctionPass *createHexagonQFPOptimizer(); +void initializeHexagonQFPOptimizerPass(PassRegistry &); +} // namespace llvm +namespace { struct HexagonQFPOptimizer : public MachineFunctionPass { public: static char ID; @@ -116,6 +129,10 @@ struct HexagonQFPOptimizer : public MachineFunctionPass { bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB); + + bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB); + StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -142,19 +159,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() { bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB) { - // Early exit: - // - if instruction is invalid or has too few operands (QFP ops need 2 sources - // + 1 dest), - // - or does not have a transformation mapping. - if (MI->getNumOperands() < 3) + if (MI->getNumOperands() == 2) + return optimizeQfpOneOp(MI, MBB); + else if (MI->getNumOperands() == 3) + return optimizeQfpTwoOp(MI, MBB); + else return false; +} + +bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI, + MachineBasicBlock *MBB) { + + unsigned Op0F = 0; auto It = QFPInstMap.find(MI->getOpcode()); if (It == QFPInstMap.end()) return false; + unsigned short InstTy = It->second; + // Get the reachind defs of MI + MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + + LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump()); + MachineInstr *ReachDefDef = nullptr; + + // Get the reaching def of the reaching def to check for W reg def + if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() && + DefMI->getOperand(1).getReg().isVirtual()) + ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg()); + unsigned ReachDefOp = DefMI->getOpcode(); + MachineInstrBuilder MIB; + + // Check if the reaching def is a conversion + if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 || + ReachDefOp == Hexagon::V6_vconv_hf_qf16) { + + // Return if the reaching def of reaching def is W type + if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) == + &Hexagon::HvxWRRegClass) + return false; + + // Analyze the use operands of the conversion to get their KILL status + MachineOperand &SrcOp = DefMI->getOperand(1); + Op0F = getKillRegState(SrcOp.isKill()); + SrcOp.setIsKill(false); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg()); + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); + return true; + } + return false; +} + +bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI, + MachineBasicBlock *MBB) { unsigned Op0F = 0; unsigned Op1F = 0; + auto It = QFPInstMap.find(MI->getOpcode()); + if (It == QFPInstMap.end()) + return false; + unsigned short InstTy = It->second; // Get the reaching defs of MI, DefMI1 and DefMI2 MachineInstr *DefMI1 = nullptr; MachineInstr *DefMI2 = nullptr; @@ -167,6 +234,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, return false; MachineOperand &Res = MI->getOperand(0); + if (!Res.isReg()) + return false; + MachineInstr *Inst1 = nullptr; MachineInstr *Inst2 = nullptr; LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump(); @@ -185,7 +255,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, unsigned Def2OP = DefMI2->getOpcode(); MachineInstrBuilder MIB; - // Case 1: Both reaching defs of MI are qf to sf/hf conversions + + // Check if the both the reaching defs of MI are qf to sf/hf conversions if ((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -226,7 +297,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if left operand's reaching def is a conversion to sf/hf } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 && Def2OP != Hexagon::V6_vconv_sf_qf32) || (Def1OP == Hexagon::V6_vconv_hf_qf16 && @@ -250,7 +321,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; - // Case 2: Left operand is conversion to sf/hf + // Check if right operand's reaching def is a conversion to sf/hf } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 && Def2OP == Hexagon::V6_vconv_sf_qf32) || (Def1OP != Hexagon::V6_vconv_hf_qf16 && @@ -258,13 +329,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, !DefMI1->isPHI() && (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { // The second operand of original instruction is converted. - // In "mix" instructions, "qf" operand is always the first operand. - - // Caveat: vsub is not commutative w.r.t operands. - if (InstTy == Hexagon::V6_vsub_qf16_mix || - InstTy == Hexagon::V6_vsub_qf32_mix) - return false; - if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) == &Hexagon::HvxWRRegClass) return false; @@ -275,10 +339,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, Op1F = getKillRegState(Src2.isKill()); Src2.setIsKill(false); Op0F = getKillRegState(Src1.isKill()); - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) - .addReg(Src2.getReg(), Op1F, - Src2.getSubReg()) // Notice the operands are flipped. - .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + if (InstTy == Hexagon::V6_vsub_qf16_mix || + InstTy == Hexagon::V6_vsub_qf32_mix) { + if (!HST->useHVXV81Ops()) + // vsub_(hf|sf)_mix insts are only avlbl on hvx81+ + return false; + // vsub is not commutative w.r.t. operands -> treat it as a special case + // to choose the correct mix instruction. + if (Def2OP == Hexagon::V6_vconv_sf_qf32) + InstTy = Hexagon::V6_vsub_sf_mix; + else if (Def2OP == Hexagon::V6_vconv_hf_qf16) + InstTy = Hexagon::V6_vsub_hf_mix; + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); + } else { + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) + .addReg(Src2.getReg(), Op1F, + Src2.getSubReg()) // Notice the operands are flipped. + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); + } LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); return true; } @@ -309,15 +389,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) { while (MII != MBBI->instr_end()) { MachineInstr *MI = &*MII; ++MII; // As MI might be removed. - - if (QFPInstMap.count(MI->getOpcode()) && - MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 && - MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) { - LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); - if (optimizeQfp(MI, MBB)) { - MI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "\t....Removing...."); - Changed = true; + if (QFPInstMap.count(MI->getOpcode())) { + auto OpC = MI->getOpcode(); + if (DisableQFOptForMul && HII->isQFPMul(MI)) + continue; + if (OpC != Hexagon::V6_vconv_sf_qf32 && + OpC != Hexagon::V6_vconv_hf_qf16) { + LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); + if (optimizeQfp(MI, MBB)) { + MI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "\t....Removing...."); + Changed = true; + } } } } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll b/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll new file mode 100644 index 0000000000000..9625a605910c2 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll @@ -0,0 +1,372 @@ +; REQUIRES: hexagon-registered-target, silver +; This tests correct handling of register spills and fills of +; qf operands during register allocation. + +; RUN: llc -mcpu=hexagonv79 -mattr=+hvx-length128b,+hvxv79,+hvx-ieee-fp,+hvx-qfloat,-long-calls -debug-only=handle-qfp %s 2>&1 -o - | FileCheck %s --check-prefixes V79-81,V79 +; RUN: llc -mcpu=hexagonv81 -mattr=+hvx-length128b,+hvxv81,+hvx-ieee-fp,+hvx-qfloat,-long-calls -debug-only=handle-qfp %s 2>&1 -o - | FileCheck %s --check-prefixes V79-81,V81 + +; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf +; V79-81: Inserting after conv: [[VREG0:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG0]] +; V79-81-NEXT: Inserting after conv: [[VREG1:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG1]] +; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf +; V79-81: Inserting after conv: [[VREG2:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG2]] +; V79-81-NEXT: Inserting after conv: [[VREG3:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG3]] +; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf +; V79-81-DAG: Inserting after conv: [[VREG4:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG4]] +; V79-81-DAG: Inserting after conv: [[VREG5:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG5]] +; V79-81-DAG: Inserting new instruction: $v{{[0-9]+}} = V6_vadd_sf killed renamable [[VREG2]], killed renamable [[VREG0]] +; V79-81-DAG: Inserting new instruction: $v{{[0-9]+}} = V6_vsub_sf killed renamable $v{{[0-9]+}}, killed renamable $v{{[0-9]+}} +; +; V79-81: Analyzing convert instruction: renamable [[VREG6:\$v[0-9]+]] = V6_vconv_hf_qf32 killed renamable $w{{[0-9]+}} +; V79: Inserting new instruction: [[VREG30:\$v[0-9]+]] = V6_vd0 +; V79-NEXT: Inserting new instruction: [[VREG7:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG7]], killed [[VREG30]] +; V79: Inserting new instruction: [[VREG30]] = V6_vd0 +; V79-NEXT: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG8]], killed [[VREG30]] +; V81: Inserting new instruction: [[VREG7:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG7]] +; V81: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG8]] + +; V79-81: Analyzing convert instruction: renamable [[VREG9:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable $v{{[0-9]+}} +; V79: Inserting new instruction: [[VREG30]] = V6_vd0 +; V79-NEXT: Inserting new instruction: [[VREG10:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG10]], killed [[VREG30]] +; V81: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG8]] + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +@.str.1 = private unnamed_addr constant [9 x i8] c"0x%08lx \00", align 1 +@.str.3 = private unnamed_addr constant [173 x i8] c"/prj/qct/llvm/devops/aether/hexbuild/test_trees/MASTER/test/regress/features/hexagon/arch_v68/hvx_ieee_fp/hvx_ieee_fp_test.c:126 0 && \22ERROR: Failed to acquire HVX unit.\\n\22\00", align 1 +@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1 +@.str.5 = private unnamed_addr constant [33 x i8] c"half -3 converted to vhf = %.2f\0A\00", align 1 +@.str.6 = private unnamed_addr constant [35 x i8] c"uhalf 32k converted to vhf = %.2f\0A\00", align 1 +@.str.7 = private unnamed_addr constant [32 x i8] c"sf 0.5 converted to vhf = %.2f\0A\00", align 1 +@.str.8 = private unnamed_addr constant [32 x i8] c"vhf 4.0 conveted to ubyte = %d\0A\00", align 1 +@.str.9 = private unnamed_addr constant [32 x i8] c"vhf 2.0 conveted to uhalf = %d\0A\00", align 1 +@.str.10 = private unnamed_addr constant [30 x i8] c"byte 4 conveted to hf = %.2f\0A\00", align 1 +@.str.11 = private unnamed_addr constant [31 x i8] c"ubyte 4 conveted to hf = %.2f\0A\00", align 1 +@.str.12 = private unnamed_addr constant [27 x i8] c"hf -3 conveted to sf = %f\0A\00", align 1 +@.str.13 = private unnamed_addr constant [31 x i8] c"vhf 4.0 conveted to byte = %d\0A\00", align 1 +@.str.14 = private unnamed_addr constant [31 x i8] c"vhf 4.0 conveted to half = %d\0A\00", align 1 +@.str.16 = private unnamed_addr constant [33 x i8] c"max of hf 2.0 and hf 4.0 = %.2f\0A\00", align 1 +@.str.17 = private unnamed_addr constant [33 x i8] c"min of hf 2.0 and hf 4.0 = %.2f\0A\00", align 1 +@.str.18 = private unnamed_addr constant [32 x i8] c"max of sf 0.5 and sf 0.25 = %f\0A\00", align 1 +@.str.19 = private unnamed_addr constant [32 x i8] c"min of sf 0.5 and sf 0.25 = %f\0A\00", align 1 +@.str.21 = private unnamed_addr constant [25 x i8] c"negate of hf 4.0 = %.2f\0A\00", align 1 +@.str.22 = private unnamed_addr constant [23 x i8] c"abs of hf -6.0 = %.2f\0A\00", align 1 +@.str.23 = private unnamed_addr constant [23 x i8] c"negate of sf 0.5 = %f\0A\00", align 1 +@.str.24 = private unnamed_addr constant [22 x i8] c"abs of sf -0.25 = %f\0A\00", align 1 +@.str.26 = private unnamed_addr constant [32 x i8] c"hf add of 4.0 and -6.0 = %.2f\0A\00", align 1 +@.str.27 = private unnamed_addr constant [32 x i8] c"hf sub of 4.0 and -6.0 = %.2f\0A\00", align 1 +@.str.28 = private unnamed_addr constant [31 x i8] c"sf add of 0.5 and -0.25 = %f\0A\00", align 1 +@.str.29 = private unnamed_addr constant [31 x i8] c"sf sub of 0.5 and -0.25 = %f\0A\00", align 1 +@.str.30 = private unnamed_addr constant [36 x i8] c"sf add of hf 4.0 and hf -6.0 = %f\0A\00", align 1 +@.str.31 = private unnamed_addr constant [36 x i8] c"sf sub of hf 4.0 and hf -6.0 = %f\0A\00", align 1 +@.str.33 = private unnamed_addr constant [32 x i8] c"hf mpy of 4.0 and -6.0 = %.2f\0A\00", align 1 +@.str.34 = private unnamed_addr constant [35 x i8] c"hf accmpy of 4.0 and -6.0 = %.2f\0A\00", align 1 +@.str.35 = private unnamed_addr constant [36 x i8] c"sf mpy of hf 4.0 and hf -6.0 = %f\0A\00", align 1 +@.str.36 = private unnamed_addr constant [39 x i8] c"sf accmpy of hf 4.0 and hf -6.0 = %f\0A\00", align 1 +@.str.37 = private unnamed_addr constant [31 x i8] c"sf mpy of 0.5 and -0.25 = %f\0A\00", align 1 +@.str.39 = private unnamed_addr constant [25 x i8] c"w copy from sf 0.5 = %f\0A\00", align 1 +@str = private unnamed_addr constant [35 x i8] c"ERROR: Failed to acquire HVX unit.\00", align 1 +@str.40 = private unnamed_addr constant [25 x i8] c"\0AConversion intructions\0A\00", align 1 +@str.41 = private unnamed_addr constant [23 x i8] c"\0AMin/Max instructions\0A\00", align 1 +@str.42 = private unnamed_addr constant [23 x i8] c"\0Aabs/neg instructions\0A\00", align 1 +@str.43 = private unnamed_addr constant [23 x i8] c"\0Aadd/sub instructions\0A\00", align 1 +@str.44 = private unnamed_addr constant [24 x i8] c"\0Amultiply instructions\0A\00", align 1 +@str.45 = private unnamed_addr constant [19 x i8] c"\0Acopy instruction\0A\00", align 1 + +declare dso_local void @print_vector_words(<32 x i32> noundef %x) local_unnamed_addr #0 + +; Function Attrs: nofree nounwind optsize +declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #0 + +; Function Attrs: nounwind optsize +define dso_local i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #1 { +entry: + %call = tail call i32 @acquire_vector_unit(i8 noundef zeroext 0) #6 + %tobool.not = icmp eq i32 %call, 0 + br i1 %tobool.not, label %if.then, label %if.end + +if.then: ; preds = %entry + %puts = tail call i32 @puts(ptr nonnull dereferenceable(1) @str) + tail call void @_Assert(ptr noundef nonnull @.str.3, ptr noundef nonnull @__func__.main) #7 + unreachable + +if.end: ; preds = %entry + tail call void @set_double_vector_mode() #6 + %0 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 16384) + %1 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 17408) + %2 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 -14848) + %3 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608) + %4 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1048576000) + %5 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 -1098907648) + %6 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 -3) + %7 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 32768) + %puts147 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.40) + %8 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.h.128B(<32 x i32> %6) + %bc.i = bitcast <32 x i32> %8 to <64 x half> + %9 = extractelement <64 x half> %bc.i, i64 0 + %conv = fpext half %9 to double + %call12 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.5, double noundef %conv) #6 + %10 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.uh.128B(<32 x i32> %7) + %bc.i153 = bitcast <32 x i32> %10 to <64 x half> + %11 = extractelement <64 x half> %bc.i153, i64 0 + %conv14 = fpext half %11 to double + %call15 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.6, double noundef %conv14) #6 + %12 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.sf.128B(<32 x i32> %3, <32 x i32> %3) + %bc.i155 = bitcast <32 x i32> %12 to <64 x half> + %13 = extractelement <64 x half> %bc.i155, i64 0 + %conv17 = fpext half %13 to double + %call18 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.7, double noundef %conv17) #6 + %14 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.ub.hf.128B(<32 x i32> %1, <32 x i32> %1) + %15 = bitcast <32 x i32> %14 to <128 x i8> + %conv.i = extractelement <128 x i8> %15, i64 0 + %conv20 = zext i8 %conv.i to i32 + %call21 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.8, i32 noundef %conv20) #6 + %16 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.uh.hf.128B(<32 x i32> %0) + %17 = bitcast <32 x i32> %16 to <64 x i16> + %conv.i157 = extractelement <64 x i16> %17, i64 0 + %conv23 = sext i16 %conv.i157 to i32 + %call24 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.9, i32 noundef %conv23) #6 + %18 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.hf.b.128B(<32 x i32> %14) + %bc.i158 = bitcast <64 x i32> %18 to <128 x half> + %19 = extractelement <128 x half> %bc.i158, i64 0 + %conv26 = fpext half %19 to double + %call27 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.10, double noundef %conv26) #6 + %20 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.hf.ub.128B(<32 x i32> %14) + %bc.i159 = bitcast <64 x i32> %20 to <128 x half> + %21 = extractelement <128 x half> %bc.i159, i64 0 + %conv29 = fpext half %21 to double + %call30 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.11, double noundef %conv29) #6 + %22 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.sf.hf.128B(<32 x i32> %8) + %bc.i161 = bitcast <64 x i32> %22 to <64 x float> + %23 = extractelement <64 x float> %bc.i161, i64 0 + %conv32 = fpext float %23 to double + %call33 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.12, double noundef %conv32) #6 + %24 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.b.hf.128B(<32 x i32> %1, <32 x i32> %1) + %25 = bitcast <32 x i32> %24 to <128 x i8> + %conv.i162 = extractelement <128 x i8> %25, i64 0 + %conv35 = zext i8 %conv.i162 to i32 + %call36 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.13, i32 noundef %conv35) #6 + %26 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.h.hf.128B(<32 x i32> %1) + %27 = bitcast <32 x i32> %26 to <64 x i16> + %conv.i163 = extractelement <64 x i16> %27, i64 0 + %conv38 = sext i16 %conv.i163 to i32 + %call39 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.14, i32 noundef %conv38) #6 + %28 = tail call <32 x i32> @llvm.hexagon.V6.vfmax.hf.128B(<32 x i32> %0, <32 x i32> %1) + %puts148 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.41) + %bc.i164 = bitcast <32 x i32> %28 to <64 x half> + %29 = extractelement <64 x half> %bc.i164, i64 0 + %conv42 = fpext half %29 to double + %call43 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.16, double noundef %conv42) #6 + %30 = tail call <32 x i32> @llvm.hexagon.V6.vfmin.hf.128B(<32 x i32> %0, <32 x i32> %1) + %bc.i166 = bitcast <32 x i32> %30 to <64 x half> + %31 = extractelement <64 x half> %bc.i166, i64 0 + %conv45 = fpext half %31 to double + %call46 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.17, double noundef %conv45) #6 + %32 = tail call <32 x i32> @llvm.hexagon.V6.vfmax.sf.128B(<32 x i32> %3, <32 x i32> %4) + %bc.i168 = bitcast <32 x i32> %32 to <32 x float> + %33 = extractelement <32 x float> %bc.i168, i64 0 + %conv48 = fpext float %33 to double + %call49 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.18, double noundef %conv48) #6 + %34 = tail call <32 x i32> @llvm.hexagon.V6.vfmin.sf.128B(<32 x i32> %3, <32 x i32> %4) + %bc.i169 = bitcast <32 x i32> %34 to <32 x float> + %35 = extractelement <32 x float> %bc.i169, i64 0 + %conv51 = fpext float %35 to double + %call52 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.19, double noundef %conv51) #6 + %puts149 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.42) + %36 = tail call <32 x i32> @llvm.hexagon.V6.vfneg.hf.128B(<32 x i32> %1) + %bc.i170 = bitcast <32 x i32> %36 to <64 x half> + %37 = extractelement <64 x half> %bc.i170, i64 0 + %conv55 = fpext half %37 to double + %call56 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.21, double noundef %conv55) #6 + %38 = tail call <32 x i32> @llvm.hexagon.V6.vabs.hf.128B(<32 x i32> %2) + %bc.i172 = bitcast <32 x i32> %38 to <64 x half> + %39 = extractelement <64 x half> %bc.i172, i64 0 + %conv58 = fpext half %39 to double + %call59 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.22, double noundef %conv58) #6 + %40 = tail call <32 x i32> @llvm.hexagon.V6.vfneg.sf.128B(<32 x i32> %3) + %bc.i174 = bitcast <32 x i32> %40 to <32 x float> + %41 = extractelement <32 x float> %bc.i174, i64 0 + %conv61 = fpext float %41 to double + %call62 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.23, double noundef %conv61) #6 + %42 = tail call <32 x i32> @llvm.hexagon.V6.vabs.sf.128B(<32 x i32> %5) + %bc.i175 = bitcast <32 x i32> %42 to <32 x float> + %43 = extractelement <32 x float> %bc.i175, i64 0 + %conv64 = fpext float %43 to double + %call65 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.24, double noundef %conv64) #6 + %puts150 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.43) + %44 = tail call <32 x i32> @llvm.hexagon.V6.vadd.hf.hf.128B(<32 x i32> %1, <32 x i32> %2) + %bc.i176 = bitcast <32 x i32> %44 to <64 x half> + %45 = extractelement <64 x half> %bc.i176, i64 0 + %conv68 = fpext half %45 to double + %call69 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.26, double noundef %conv68) #6 + %46 = tail call <32 x i32> @llvm.hexagon.V6.vsub.hf.hf.128B(<32 x i32> %1, <32 x i32> %2) + %bc.i178 = bitcast <32 x i32> %46 to <64 x half> + %47 = extractelement <64 x half> %bc.i178, i64 0 + %conv71 = fpext half %47 to double + %call72 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.27, double noundef %conv71) #6 + %48 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.sf.128B(<32 x i32> %3, <32 x i32> %5) + %bc.i180 = bitcast <32 x i32> %48 to <32 x float> + %49 = extractelement <32 x float> %bc.i180, i64 0 + %conv74 = fpext float %49 to double + %call75 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.28, double noundef %conv74) #6 + %50 = tail call <32 x i32> @llvm.hexagon.V6.vsub.sf.sf.128B(<32 x i32> %3, <32 x i32> %5) + %bc.i181 = bitcast <32 x i32> %50 to <32 x float> + %51 = extractelement <32 x float> %bc.i181, i64 0 + %conv77 = fpext float %51 to double + %call78 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.29, double noundef %conv77) #6 + %52 = tail call <64 x i32> @llvm.hexagon.V6.vadd.sf.hf.128B(<32 x i32> %1, <32 x i32> %2) + %bc.i182 = bitcast <64 x i32> %52 to <64 x float> + %53 = extractelement <64 x float> %bc.i182, i64 0 + %conv80 = fpext float %53 to double + %call81 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.30, double noundef %conv80) #6 + %54 = tail call <64 x i32> @llvm.hexagon.V6.vsub.sf.hf.128B(<32 x i32> %1, <32 x i32> %2) + %bc.i183 = bitcast <64 x i32> %54 to <64 x float> + %55 = extractelement <64 x float> %bc.i183, i64 0 + %conv83 = fpext float %55 to double + %call84 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.31, double noundef %conv83) #6 + %puts151 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.44) + %56 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.128B(<32 x i32> %1, <32 x i32> %2) + %bc.i184 = bitcast <32 x i32> %56 to <64 x half> + %57 = extractelement <64 x half> %bc.i184, i64 0 + %conv87 = fpext half %57 to double + %call88 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.33, double noundef %conv87) #6 + %58 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.acc.128B(<32 x i32> %56, <32 x i32> %1, <32 x i32> %2) + %bc.i186 = bitcast <32 x i32> %58 to <64 x half> + %59 = extractelement <64 x half> %bc.i186, i64 0 + %conv90 = fpext half %59 to double + %call91 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.34, double noundef %conv90) #6 + %60 = tail call <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.128B(<32 x i32> %1, <32 x i32> %2) + %bc.i188 = bitcast <64 x i32> %60 to <64 x float> + %61 = extractelement <64 x float> %bc.i188, i64 0 + %conv93 = fpext float %61 to double + %call94 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.35, double noundef %conv93) #6 + %62 = tail call <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.acc.128B(<64 x i32> %60, <32 x i32> %1, <32 x i32> %2) + %bc.i189 = bitcast <64 x i32> %62 to <64 x float> + %63 = extractelement <64 x float> %bc.i189, i64 0 + %conv96 = fpext float %63 to double + %call97 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.36, double noundef %conv96) #6 + %64 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.sf.sf.128B(<32 x i32> %3, <32 x i32> %5) + %bc.i190 = bitcast <32 x i32> %64 to <32 x float> + %65 = extractelement <32 x float> %bc.i190, i64 0 + %conv99 = fpext float %65 to double + %call100 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.37, double noundef %conv99) #6 + %puts152 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.45) + %66 = tail call <32 x i32> @llvm.hexagon.V6.vassign.fp.128B(<32 x i32> %3) + %bc.i191 = bitcast <32 x i32> %66 to <32 x float> + %67 = extractelement <32 x float> %bc.i191, i64 0 + %conv103 = fpext float %67 to double + %call104 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.39, double noundef %conv103) #6 + ret i32 0 +} + +; Function Attrs: optsize +declare dso_local i32 @acquire_vector_unit(i8 noundef zeroext) local_unnamed_addr #2 + +; Function Attrs: noreturn nounwind optsize +declare dso_local void @_Assert(ptr noundef, ptr noundef) local_unnamed_addr #3 + +; Function Attrs: optsize +declare dso_local void @set_double_vector_mode(...) local_unnamed_addr #2 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.h.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.uh.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.sf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vcvt.ub.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vcvt.uh.hf.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <64 x i32> @llvm.hexagon.V6.vcvt.hf.b.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <64 x i32> @llvm.hexagon.V6.vcvt.hf.ub.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <64 x i32> @llvm.hexagon.V6.vcvt.sf.hf.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vcvt.b.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vcvt.h.hf.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vfmax.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vfmin.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vfmax.sf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vfmin.sf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vfneg.hf.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vabs.hf.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vfneg.sf.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vabs.sf.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vadd.hf.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vsub.hf.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vadd.sf.sf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vsub.sf.sf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <64 x i32> @llvm.hexagon.V6.vadd.sf.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <64 x i32> @llvm.hexagon.V6.vsub.sf.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.acc.128B(<32 x i32>, <32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.acc.128B(<64 x i32>, <32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vmpy.sf.sf.128B(<32 x i32>, <32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vassign.fp.128B(<32 x i32>) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32) #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) #4 + +; Function Attrs: nofree nounwind +declare noundef i32 @putchar(i32 noundef) local_unnamed_addr #5 + +; Function Attrs: nofree nounwind +declare noundef i32 @puts(ptr nocapture noundef readonly) local_unnamed_addr #5 diff --git a/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll b/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll new file mode 100644 index 0000000000000..cdb779f5c4e7d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll @@ -0,0 +1,60 @@ +;; RUN: llc --mtriple=hexagon --mcpu=hexagonv81 --mattr=+hvxv81,+hvx-length128b %s -o - | FileCheck %s + +define void @mul_and_sub_1(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) { +entry: + %AVec = load <32 x float>, ptr %A, align 4 + %BVec = load <32 x float>, ptr %B, align 4 + %CVec = load <32 x float>, ptr %C, align 4 + %AtBVec = fmul <32 x float> %AVec, %BVec + + %DVec = fsub <32 x float> %CVec, %AtBVec + store <32 x float> %DVec, ptr %D, align 4 + ret void +} +;; CHECK: mul_and_sub_1 +;; CHECK: vsub(v{{[0-9]+}}.sf,v{{[0-9]+}}.qf32) + + +define void @mul_and_sub_2(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) { +entry: + %AVec = load <32 x float>, ptr %A, align 4 + %BVec = load <32 x float>, ptr %B, align 4 + %CVec = load <32 x float>, ptr %C, align 4 + %AtBVec = fmul <32 x float> %AVec, %BVec + + %DVec = fsub <32 x float> %AtBVec, %CVec + store <32 x float> %DVec, ptr %D, align 4 + ret void +} +;; CHECK: mul_and_sub_2 +;; CHECK: vsub(v{{[0-9]+}}.qf32,v{{[0-9]+}}.sf) + + +define void @mul_and_sub_3(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) { +entry: + %AVec = load <64 x half>, ptr %A, align 4 + %BVec = load <64 x half>, ptr %B, align 4 + %CVec = load <64 x half>, ptr %C, align 4 + %AtBVec = fmul <64 x half> %AVec, %BVec + + %DVec = fsub <64 x half> %CVec, %AtBVec + store <64 x half> %DVec, ptr %D, align 4 + ret void +} +;; CHECK: mul_and_sub_3 +;; CHECK: vsub(v{{[0-9]+}}.hf,v{{[0-9]+}}.qf16) + + +define void @mul_and_sub_4(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) { +entry: + %AVec = load <64 x half>, ptr %A, align 4 + %BVec = load <64 x half>, ptr %B, align 4 + %CVec = load <64 x half>, ptr %C, align 4 + %AtBVec = fmul <64 x half> %AVec, %BVec + + %DVec = fsub <64 x half> %AtBVec, %CVec + store <64 x half> %DVec, ptr %D, align 4 + ret void +} +;; CHECK: mul_and_sub_4 +;; CHECK: vsub(v{{[0-9]+}}.qf16,v{{[0-9]+}}.hf) diff --git a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll index c16370c3b907d..527f27e56c334 100644 --- a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll +++ b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll @@ -2,7 +2,7 @@ ; type as first parameter instead of a sf type without ; any conversion instruction of type sf = qf32 -; RUN: llc -mtriple=hexagon < %s -o - | FileCheck %s +; RUN: llc -mtriple=hexagon -mattr=+hvx-length128b,+hvxv75,+v75 < %s -o - | FileCheck %s ; CHECK: [[V2:v[0-9]+]] = vxor([[V2]],[[V2]]) ; CHECK: [[V0:v[0-9]+]].qf32 = vmpy([[V0]].sf,[[V2]].sf) @@ -17,5 +17,3 @@ entry: store <64 x half> %conv17.ripple.vectorized, ptr %out_ptr, align 2 ret void } - -attributes #0 = { "target-features"="+hvx-length128b,+hvxv75,+v75,-long-calls,-small-data" } diff --git a/llvm/test/CodeGen/Hexagon/vect-qfp.mir b/llvm/test/CodeGen/Hexagon/vect-qfp.mir new file mode 100644 index 0000000000000..6909591ffddf0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect-qfp.mir @@ -0,0 +1,202 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +# RUN: -run-pass hexagon-qfp-optimizer -disable-qfp-opt-mul=false %s -o - | FileCheck %s --check-prefix=MUL-ENABLED +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s --check-prefix=DEFAULT +# MUL-ENABLED-LABEL: name: qfpAdd32 +# MUL-ENABLED: V6_vconv_sf_qf32 +# MUL-ENABLED-NEXT: V6_vadd_qf32_mix +# MUL-ENABLED-NEXT: V6_vconv_sf_qf32 +# MUL-ENABLED-NEXT: V6_vS32Ub_ai +# MUL-ENABLED-NEXT: V6_vadd_qf32 +# DEFAULT-LABEL: name: qfpAdd32 +# DEFAULT: V6_vconv_sf_qf32 +# DEFAULT-NEXT: V6_vadd_qf32_mix +# DEFAULT-NEXT: V6_vconv_sf_qf32 +# DEFAULT-NEXT: V6_vS32Ub_ai +# DEFAULT-NEXT: V6_vadd_qf32 +--- +name: qfpAdd32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vadd_sf %4:hvxvr, %5:hvxvr + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vadd_sf %5:hvxvr, %7:hvxvr + %9:hvxvr = V6_vconv_sf_qf32 %8:hvxvr + V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr + %10:hvxvr = V6_vadd_sf %7:hvxvr, %9:hvxvr + %11:hvxvr = V6_vconv_sf_qf32 %10:hvxvr + V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr +... +# MUL-ENABLED-LABEL: name: qfpAdd16 +# MUL-ENABLED: V6_vconv_hf_qf16 +# MUL-ENABLED-NEXT: V6_vadd_qf16_mix +# MUL-ENABLED-NEXT: V6_vconv_hf_qf16 +# MUL-ENABLED-NEXT: V6_vS32Ub_ai +# MUL-ENABLED-NEXT: V6_vadd_qf16 +# DEFAULT-LABEL: name: qfpAdd16 +# DEFAULT: V6_vconv_hf_qf16 +# DEFAULT-NEXT: V6_vadd_qf16_mix +# DEFAULT-NEXT: V6_vconv_hf_qf16 +# DEFAULT-NEXT: V6_vS32Ub_ai +# DEFAULT-NEXT: V6_vadd_qf16 +--- +name: qfpAdd16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vadd_hf %4:hvxvr, %5:hvxvr + %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr + %8:hvxvr = V6_vadd_hf %5:hvxvr, %7:hvxvr + %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr + V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr + %10:hvxvr = V6_vadd_hf %7:hvxvr, %9:hvxvr + %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr + V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr +... +# MUL-ENABLED-LABEL: name: qfpSub32 +# MUL-ENABLED: V6_vconv_sf_qf32 +# MUL-ENABLED-NEXT: V6_vsub_qf32_mix +# MUL-ENABLED-NEXT: V6_vconv_sf_qf32 +# MUL-ENABLED-NEXT: V6_vS32Ub_ai +# MUL-ENABLED-NEXT: V6_vsub_qf32 +# DEFAULT-LABEL: name: qfpSub32 +# DEFAULT: V6_vconv_sf_qf32 +# DEFAULT-NEXT: V6_vsub_qf32_mix +# DEFAULT-NEXT: V6_vconv_sf_qf32 +# DEFAULT-NEXT: V6_vS32Ub_ai +# DEFAULT-NEXT: V6_vsub_qf32 +--- +name: qfpSub32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vsub_sf %4:hvxvr, %5:hvxvr + %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr + %8:hvxvr = V6_vsub_sf %7:hvxvr, %5:hvxvr + %9:hvxvr = V6_vconv_sf_qf32 %8:hvxvr + V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr + %10:hvxvr = V6_vsub_sf %7:hvxvr, %9:hvxvr + %11:hvxvr = V6_vconv_sf_qf32 %10:hvxvr + V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr +... +# MUL-ENABLED-LABEL: name: qfpSub16 +# MUL-ENABLED: V6_vconv_hf_qf16 +# MUL-ENABLED-NEXT: V6_vsub_qf16_mix +# MUL-ENABLED-NEXT: V6_vconv_hf_qf16 +# MUL-ENABLED-NEXT: V6_vS32Ub_ai +# MUL-ENABLED-NEXT: V6_vsub_qf16 +# DEFAULT-LABEL: name: qfpSub16 +# DEFAULT: V6_vconv_hf_qf16 +# DEFAULT-NEXT: V6_vsub_qf16_mix +# DEFAULT-NEXT: V6_vconv_hf_qf16 +# DEFAULT-NEXT: V6_vS32Ub_ai +# DEFAULT-NEXT: V6_vsub_qf16 +--- +name: qfpSub16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vsub_hf %4:hvxvr, %5:hvxvr + %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr + %8:hvxvr = V6_vsub_hf %7:hvxvr, %5:hvxvr + %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr + V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr + %10:hvxvr = V6_vsub_hf %7:hvxvr, %9:hvxvr + %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr + V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr +... +# MUL-ENABLED-LABEL: name: qfpMul32 +# MUL-ENABLED: V6_vmpy_qf32_sf +# MUL-ENABLED-NEXT: V6_vconv_sf_qf32 +# MUL-ENABLED-NEXT: V6_vmpy_qf32_sf +# MUL-ENABLED-NEXT: V6_vconv_sf_qf32 +# MUL-ENABLED-NEXT: V6_vmpy_qf32 +# MUL-ENABLED-NEXT: V6_vS32Ub_ai +# DEFAULT-LABEL: name: qfpMul32 +# DEFAULT: V6_vmpy_qf32_sf +# DEFAULT-NEXT: V6_vconv_sf_qf32 +# DEFAULT-NEXT: V6_vmpy_qf32_sf +# DEFAULT-NEXT: V6_vconv_sf_qf32 +# DEFAULT-NEXT: V6_vmpy_qf32_sf +# DEFAULT-NEXT: V6_vS32Ub_ai +--- +name: qfpMul32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vL32Ub_ai %2:intregs, 0 + %7:hvxvr = V6_vmpy_qf32_sf %4:hvxvr, %5:hvxvr + %8:hvxvr = V6_vconv_sf_qf32 %7:hvxvr + %9:hvxvr = V6_vmpy_qf32_sf %5:hvxvr, %6:hvxvr + %10:hvxvr = V6_vconv_sf_qf32 %9:hvxvr + %11:hvxvr = V6_vmpy_qf32_sf %8:hvxvr, %10:hvxvr + V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr +... +# MUL-ENABLED-LABEL: name: qfpMul16 +# MUL-ENABLED: V6_vconv_hf_qf16 +# MUL-ENABLED-NEXT: V6_vmpy_qf16_mix_hf +# MUL-ENABLED-NEXT: V6_vconv_hf_qf16 +# MUL-ENABLED-NEXT: V6_vS32Ub_ai +# MUL-ENABLED-NEXT: V6_vmpy_qf16 +# DEFAULT-LABEL: name: qfpMul16 +# DEFAULT: V6_vconv_hf_qf16 +# DEFAULT-NEXT: V6_vmpy_qf16_hf +# DEFAULT-NEXT: V6_vconv_hf_qf16 +# DEFAULT-NEXT: V6_vS32Ub_ai +# DEFAULT-NEXT: V6_vmpy_qf16_hf +--- +name: qfpMul16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + %0:intregs = COPY $r0 + %1:intregs = COPY $r1 + %2:intregs = COPY $r2 + %3:intregs = COPY $r3 + %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0 + %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0 + %6:hvxvr = V6_vmpy_qf16_hf %4:hvxvr, %5:hvxvr + %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr + %8:hvxvr = V6_vmpy_qf16_hf %5:hvxvr, %7:hvxvr + %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr + V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr + %10:hvxvr = V6_vmpy_qf16_hf %7:hvxvr, %9:hvxvr + %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr + V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir b/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir new file mode 100644 index 0000000000000..482edc8dc242b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir @@ -0,0 +1,97 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s + + +# CHECK: name: qfp_vilog32 +# CHECK: V6_vilog2_qf32 +--- +name: qfp_vilog32 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + $v0 = V6_vL32Ub_ai $r0, 0 + $v1 = V6_vconv_sf_qf32 $v0 + $v2 = V6_vilog2_sf $v1 + V6_vS32Ub_ai $r2, 0, $v2 +... + +# CHECK-LABEL: name: qfp_vilog16 +# CHECK: V6_vilog2_qf16 +--- +name: qfp_vilog16 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + $v0 = V6_vL32Ub_ai $r0, 0 + $v1 = V6_vconv_hf_qf16 $v0 + $v2 = V6_vilog2_hf $v1 + V6_vS32Ub_ai $r2, 0, $v2 +... + +# CHECK: name: qfp_vneg32 +# CHECK: V6_vneg_qf32_qf32 +--- +name: qfp_vneg32 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + $v0 = V6_vL32Ub_ai $r0, 0 + $v1 = V6_vconv_sf_qf32 $v0 + $v2 = V6_vneg_qf32_sf $v1 + $v3 = V6_vconv_sf_qf32 $v2 + V6_vS32Ub_ai $r2, 0, $v3 +... + +# CHECK-LABEL: name: qfp_vneg16 +# CHECK: V6_vneg_qf16_qf16 +--- +name: qfp_vneg16 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + $v0 = V6_vL32Ub_ai $r0, 0 + $v1 = V6_vconv_hf_qf16 $v0 + $v2 = V6_vneg_qf16_hf $v1 + $v3 = V6_vconv_hf_qf16 $v2 + V6_vS32Ub_ai $r2, 0, $v3 +... + +# CHECK: name: qfp_vabs32 +# CHECK: V6_vabs_qf32_qf32 +--- +name: qfp_vabs32 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + $v0 = V6_vL32Ub_ai $r0, 0 + $v1 = V6_vconv_sf_qf32 $v0 + $v2 = V6_vabs_qf32_sf $v1 + $v3 = V6_vconv_sf_qf32 $v2 + V6_vS32Ub_ai $r2, 0, $v3 +... + +# CHECK-LABEL: name: qfp_vabs16 +# CHECK: V6_vabs_qf16_qf16 +--- +name: qfp_vabs16 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3 + $v0 = V6_vL32Ub_ai $r0, 0 + $v1 = V6_vconv_hf_qf16 $v0 + $v2 = V6_vabs_qf16_hf $v1 + $v3 = V6_vconv_hf_qf16 $v2 + V6_vS32Ub_ai $r2, 0, $v3 +...