diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 30fe69de6ed00e..35916d11585494 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3008,6 +3008,8 @@ def mv66 : Flag<["-"], "mv66">, Group, Alias, AliasArgs<["hexagonv66"]>; def mv67 : Flag<["-"], "mv67">, Group, Alias, AliasArgs<["hexagonv67"]>; +def mv67t : Flag<["-"], "mv67t">, Group, + Alias, AliasArgs<["hexagonv67t"]>; def mhexagon_hvx : Flag<["-"], "mhvx">, Group, HelpText<"Enable Hexagon Vector eXtensions">; def mhexagon_hvx_EQ : Joined<["-"], "mhvx=">, diff --git a/clang/lib/Basic/Targets/Hexagon.cpp b/clang/lib/Basic/Targets/Hexagon.cpp index 2f31c3512a1a59..fcf9e9cff31da6 100644 --- a/clang/lib/Basic/Targets/Hexagon.cpp +++ b/clang/lib/Basic/Targets/Hexagon.cpp @@ -60,6 +60,9 @@ void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts, } else if (CPU == "hexagonv67") { Builder.defineMacro("__HEXAGON_V67__"); Builder.defineMacro("__HEXAGON_ARCH__", "67"); + } else if (CPU == "hexagonv67t") { + Builder.defineMacro("__HEXAGON_V67T__"); + Builder.defineMacro("__HEXAGON_ARCH__", "67"); } if (hasFeature("hvx-length64b")) { @@ -79,13 +82,20 @@ void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts, if (hasFeature("audio")) { Builder.defineMacro("__HEXAGON_AUDIO__"); } + + std::string NumPhySlots = isTinyCore() ? "3" : "4"; + Builder.defineMacro("__HEXAGON_PHYSICAL_SLOTS__", NumPhySlots); } bool HexagonTargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { + if (isTinyCore()) + Features["audio"] = true; + StringRef CPUFeature = CPU; CPUFeature.consume_front("hexagon"); + CPUFeature.consume_back("t"); Features[CPUFeature] = true; Features["long-calls"] = false; @@ -174,7 +184,7 @@ static constexpr CPUSuffix Suffixes[] = { {{"hexagonv5"}, {"5"}}, {{"hexagonv55"}, {"55"}}, {{"hexagonv60"}, {"60"}}, {{"hexagonv62"}, {"62"}}, {{"hexagonv65"}, {"65"}}, {{"hexagonv66"}, {"66"}}, - {{"hexagonv67"}, {"67"}}, + {{"hexagonv67"}, {"67"}}, {{"hexagonv67t"}, {"67t"}}, }; const char *HexagonTargetInfo::getHexagonCPUSuffix(StringRef Name) { diff --git a/clang/lib/Basic/Targets/Hexagon.h b/clang/lib/Basic/Targets/Hexagon.h index c94bc58be35c65..2a72825e3c5a31 100644 --- a/clang/lib/Basic/Targets/Hexagon.h +++ b/clang/lib/Basic/Targets/Hexagon.h @@ -124,6 +124,11 @@ class LLVM_LIBRARY_VISIBILITY HexagonTargetInfo : public TargetInfo { int getEHDataRegisterNumber(unsigned RegNo) const override { return RegNo < 2 ? RegNo : -1; } + + bool isTinyCore() const { + // We can write more stricter checks later. + return CPU.find('t') != std::string::npos; + } }; } // namespace targets } // namespace clang diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index d2400472c63eab..2b9046712a2614 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -47,13 +47,12 @@ static void handleHVXWarnings(const Driver &D, const ArgList &Args) { // Handle hvx target features explicitly. static void handleHVXTargetFeatures(const Driver &D, const ArgList &Args, std::vector &Features, - bool &HasHVX) { + StringRef Cpu, bool &HasHVX) { // Handle HVX warnings. handleHVXWarnings(D, Args); // Add the +hvx* features based on commandline flags. StringRef HVXFeature, HVXLength; - StringRef Cpu(toolchains::HexagonToolChain::GetTargetCPUVersion(Args)); // Handle -mhvx, -mhvx=, -mno-hvx. if (Arg *A = Args.getLastArg(options::OPT_mno_hexagon_hvx, @@ -107,7 +106,15 @@ void hexagon::getHexagonTargetFeatures(const Driver &D, const ArgList &Args, Features.push_back(UseLongCalls ? "+long-calls" : "-long-calls"); bool HasHVX = false; - handleHVXTargetFeatures(D, Args, Features, HasHVX); + StringRef Cpu(toolchains::HexagonToolChain::GetTargetCPUVersion(Args)); + // 't' in Cpu denotes tiny-core micro-architecture. For now, the co-processors + // have no dependency on micro-architecture. + const bool TinyCore = Cpu.contains('t'); + + if (TinyCore) + Cpu = Cpu.take_front(Cpu.size() - 1); + + handleHVXTargetFeatures(D, Args, Features, Cpu, HasHVX); if (HexagonToolChain::isAutoHVXEnabled(Args) && !HasHVX) D.Diag(diag::warn_drv_vectorize_needs_hvx); diff --git a/clang/test/CodeGen/builtins-hexagon-v67-audio.c b/clang/test/CodeGen/builtins-hexagon-v67-audio.c index 7506cad462766d..c120f86564d8ce 100644 --- a/clang/test/CodeGen/builtins-hexagon-v67-audio.c +++ b/clang/test/CodeGen/builtins-hexagon-v67-audio.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple hexagon -target-cpu hexagonv67 -target-feature +audio -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple hexagon -target-cpu hexagonv67t -target-feature +audio -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: @test1 // CHECK: call i64 @llvm.hexagon.M7.dcmpyrw(i64 %0, i64 %1) diff --git a/clang/test/Driver/hexagon-toolchain-elf.c b/clang/test/Driver/hexagon-toolchain-elf.c index f1bf0b4acc1801..d7f450135a3478 100644 --- a/clang/test/Driver/hexagon-toolchain-elf.c +++ b/clang/test/Driver/hexagon-toolchain-elf.c @@ -142,6 +142,14 @@ // CHECK02A: "-cc1" {{.*}} "-target-cpu" "hexagonv67" // CHECK02A: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v67/crt0 +// RUN: %clang -### -target hexagon-unknown-elf \ +// RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ +// RUN: -mcpu=hexagonv67t \ +// RUN: %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK02B %s +// CHECK02B: "-cc1" {{.*}} "-target-cpu" "hexagonv67t" +// CHECK02B: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v67t/crt0 + // ----------------------------------------------------------------------------- // Test Linker related args // ----------------------------------------------------------------------------- diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index cca79bd37ee1c8..a943f553e18cda 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -583,6 +583,7 @@ enum { EF_HEXAGON_MACH_V65 = 0x00000065, // Hexagon V65 EF_HEXAGON_MACH_V66 = 0x00000066, // Hexagon V66 EF_HEXAGON_MACH_V67 = 0x00000067, // Hexagon V67 + EF_HEXAGON_MACH_V67T = 0x00008067, // Hexagon V67T // Highest ISA version flags EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0] diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 366eece985a297..2de6aaa78b0c64 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -351,6 +351,7 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_HEXAGON_MACH_V65); BCase(EF_HEXAGON_MACH_V66); BCase(EF_HEXAGON_MACH_V67); + BCase(EF_HEXAGON_MACH_V67T); BCase(EF_HEXAGON_ISA_V2); BCase(EF_HEXAGON_ISA_V3); BCase(EF_HEXAGON_ISA_V4); diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 108303b4475d0b..2f12ea6a016abf 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -469,13 +469,16 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { LLVM_DEBUG(dbgs() << "--\n"); MCB.setLoc(IDLoc); + // Check the bundle for errors. const MCRegisterInfo *RI = getContext().getRegisterInfo(); - HexagonMCChecker Check(getContext(), MII, getSTI(), MCB, *RI); + MCSubtargetInfo const &STI = getSTI(); + + MCInst OrigBundle = MCB; + HexagonMCChecker Check(getContext(), MII, STI, MCB, *RI, true); - bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MII, getSTI(), - getContext(), MCB, - &Check); + bool CheckOk = HexagonMCInstrInfo::canonicalizePacket( + MII, STI, getContext(), MCB, &Check, true); if (CheckOk) { if (HexagonMCInstrInfo::bundleSize(MCB) == 0) { @@ -484,15 +487,12 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { // Empty packets are valid yet aren't emitted return false; } - Out.EmitInstruction(MCB, getSTI()); - } else { - // If compounding and duplexing didn't reduce the size below - // 4 or less we have a packet that is too big. - if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) { - Error(IDLoc, "invalid instruction packet: out of slots"); - } + + assert(HexagonMCInstrInfo::isBundle(MCB)); + + Out.EmitInstruction(MCB, STI); + } else return true; // Error - } return false; // No error } @@ -520,6 +520,8 @@ bool HexagonAsmParser::matchBundleOptions() { HexagonMCInstrInfo::setMemReorderDisabled(MCB); else return getParser().Error(IDLoc, MemNoShuffMsg); + } else if (Option.compare_lower("mem_no_order") == 0) { + // Nothing. } else return getParser().Error(IDLoc, llvm::Twine("'") + Option + "' is not a valid bundle option"); diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 7a90d585eb9a84..d71409de5e3564 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -185,7 +185,10 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; if (Size > HEXAGON_MAX_PACKET_SIZE) return MCDisassembler::Fail; - HexagonMCChecker Checker(getContext(), *MCII, STI, MI, + + const auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI); + const auto STI_ = (ArchSTI != nullptr) ? *ArchSTI : STI; + HexagonMCChecker Checker(getContext(), *MCII, STI_, MI, *getContext().getRegisterInfo(), false); if (!Checker.check()) return MCDisassembler::Fail; diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index a8f93a5c63be4e..04c8b23898ddef 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" // Hexagon Architectures include "HexagonDepArch.td" +def ProcTinyCore: SubtargetFeature<"tinycore", "HexagonProcFamily", + "TinyCore", "Hexagon Tiny Core">; + // Hexagon ISA Extensions def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true", "Hexagon ZReg extension instructions">; @@ -114,6 +117,11 @@ def HasPreV65 : Predicate<"HST->hasPreV65()">, def HasMemNoShuf : Predicate<"HST->hasMemNoShuf()">, AssemblerPredicate<"FeatureMemNoShuf">; def UseUnsafeMath : Predicate<"HST->useUnsafeMath()">; +def NotOptTinyCore : Predicate<"!HST->isTinyCore() ||" + "MF->getFunction().hasOptSize()"> { + let RecomputePerFunction = 1; +} +def UseSmallData : Predicate<"HST->useSmallData()">; def Hvx64: HwMode<"+hvx-length64b">; def Hvx128: HwMode<"+hvx-length128b">; @@ -386,6 +394,14 @@ def : Proc<"hexagonv67", HexagonModelV67, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; +// Need to update the correct features for tiny core. +// Disable NewValueJumps since the packetizer is unable to handle a packet with +// a new value jump and another SLOT0 instruction. +def : Proc<"hexagonv67t", HexagonModelV67T, + [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, + ProcTinyCore, ExtensionAudio, + FeatureCompound, FeatureMemNoShuf, FeatureMemops, + FeatureNVS, FeaturePackets, FeatureSmallData]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 799b85ed48b4c6..49edb0d9949287 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1433,10 +1433,16 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, .addImm(int32_t(Lo)); return Reg; } + MachineFunction *MF = B.getParent(); + auto &HST = MF->getSubtarget(); - BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg) - .addImm(C); - return Reg; + // Disable CONST64 for tiny core since it takes a LD resource. + if (!HST.isTinyCore() || + MF->getFunction().hasOptSize()) { + BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg) + .addImm(C); + return Reg; + } } if (RC == &Hexagon::PredRegsRegClass) { diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp index 5b61d1084e08ae..5821e72227bcbe 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2836,6 +2836,9 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, if (MI.isCopy()) return false; + MachineFunction *MF = MI.getParent()->getParent(); + auto &HST = MF->getSubtarget(); + // Collect all virtual register-def operands. SmallVector DefRegs; for (const MachineOperand &MO : MI.operands()) { @@ -2923,11 +2926,13 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, NewMI = BuildMI(B, At, DL, *NewD, NewR) .addImm(Hi) .addImm(Lo); - } else { + } else if (MF->getFunction().hasOptSize() || !HST.isTinyCore()) { + // Disable CONST64 for tiny core since it takes a LD resource. NewD = &HII.get(Hexagon::CONST64); NewMI = BuildMI(B, At, DL, *NewD, NewR) .addImm(V); - } + } else + return false; } } (void)NewMI; diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 394a329ac44766..ae45d8ddf4a996 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -212,7 +212,7 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, // There is a combine of two constant extended values into CONST64, // provided both constants are true immediates. if (isGreaterThanNBitTFRI<16>(HighRegInst) && - isGreaterThanNBitTFRI<16>(LowRegInst)) + isGreaterThanNBitTFRI<16>(LowRegInst) && !IsConst64Disabled) return (HighRegInst.getOperand(1).isImm() && LowRegInst.getOperand(1).isImm()); @@ -279,11 +279,11 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, // A reverse_iterator instantiated like below starts before I2, and I1 // respectively. // Look at instructions I in between I2 and (excluding) I1. - MachineBasicBlock::reverse_iterator I(I2), - End = --(MachineBasicBlock::reverse_iterator(I1)); + MachineBasicBlock::reverse_iterator I = ++I2.getIterator().getReverse(); + MachineBasicBlock::reverse_iterator End = I1.getIterator().getReverse(); // At 03 we got better results (dhrystone!) by being more conservative. if (!ShouldCombineAggressively) - End = MachineBasicBlock::reverse_iterator(I1); + End = ++I1.getIterator().getReverse(); // If I2 kills its operand and we move I2 over an instruction that also // uses I2's use reg we need to modify that (first) instruction to now kill // this reg. @@ -477,6 +477,10 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { ShouldCombineAggressively = MF.getTarget().getOptLevel() <= CodeGenOpt::Default; + // Disable CONST64 for tiny core since it takes a LD resource. + if (!OptForSize && ST->isTinyCore()) + IsConst64Disabled = true; + // Traverse basic blocks. for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h index ad879e77caab41..45b4cf042443a8 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -26,10 +26,10 @@ static constexpr ArrayRef ArchValsNum(ArchValsNumArray); static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67" }; static constexpr ArrayRef ArchValsText(ArchValsTextArray); -static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67" }; +static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t" }; static constexpr ArrayRef CpuValsText(CpuValsTextArray); -static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67" }; +static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t" }; static constexpr ArrayRef CpuNickText(CpuNickTextArray); static const std::map CpuTable{ @@ -41,6 +41,7 @@ static const std::map CpuTable{ {"hexagonv65", Hexagon::ArchEnum::V65}, {"hexagonv66", Hexagon::ArchEnum::V66}, {"hexagonv67", Hexagon::ArchEnum::V67}, + {"hexagonv67t", Hexagon::ArchEnum::V67}, }; } // namespace Hexagon } // namespace llvm; diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td index 9d9f70fa10c084..fecccb250198d5 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -6253,3 +6253,831 @@ class DepScalarItinV67 { [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> ]; } + +class DepScalarItinV67T { + list DepScalarItinV67T_list = [ + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +} diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 9c424bad13931e..1fa32852e8e1ff 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -118,6 +118,12 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), Subtarget(ST) {} +namespace llvm { +namespace HexagonFUnits { + bool isSlot0Only(unsigned units); +} +} + static bool isIntRegForSubInst(unsigned Reg) { return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); @@ -3403,6 +3409,64 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, : Hexagon::J4_cmpeqi_tp1_jump_nt; } +// Returns -1 if there is no opcode found. +int HexagonInstrInfo::getDuplexOpcode(const MachineInstr &MI, + bool ForBigCore) const { + // Static table to switch the opcodes across Tiny Core and Big Core. + // dup_ opcodes are Big core opcodes. + // NOTE: There are special instructions that need to handled later. + // L4_return* instructions, they will only occupy SLOT0 (on big core too). + // PS_jmpret - This pseudo translates to J2_jumpr which occupies only SLOT2. + // The compiler need to base the root instruction to L6_return_map_to_raw + // which can go any slot. + static const std::map DupMap = { + {Hexagon::A2_add, Hexagon::dup_A2_add}, + {Hexagon::A2_addi, Hexagon::dup_A2_addi}, + {Hexagon::A2_andir, Hexagon::dup_A2_andir}, + {Hexagon::A2_combineii, Hexagon::dup_A2_combineii}, + {Hexagon::A2_sxtb, Hexagon::dup_A2_sxtb}, + {Hexagon::A2_sxth, Hexagon::dup_A2_sxth}, + {Hexagon::A2_tfr, Hexagon::dup_A2_tfr}, + {Hexagon::A2_tfrsi, Hexagon::dup_A2_tfrsi}, + {Hexagon::A2_zxtb, Hexagon::dup_A2_zxtb}, + {Hexagon::A2_zxth, Hexagon::dup_A2_zxth}, + {Hexagon::A4_combineii, Hexagon::dup_A4_combineii}, + {Hexagon::A4_combineir, Hexagon::dup_A4_combineir}, + {Hexagon::A4_combineri, Hexagon::dup_A4_combineri}, + {Hexagon::C2_cmoveif, Hexagon::dup_C2_cmoveif}, + {Hexagon::C2_cmoveit, Hexagon::dup_C2_cmoveit}, + {Hexagon::C2_cmovenewif, Hexagon::dup_C2_cmovenewif}, + {Hexagon::C2_cmovenewit, Hexagon::dup_C2_cmovenewit}, + {Hexagon::C2_cmpeqi, Hexagon::dup_C2_cmpeqi}, + {Hexagon::L2_deallocframe, Hexagon::dup_L2_deallocframe}, + {Hexagon::L2_loadrb_io, Hexagon::dup_L2_loadrb_io}, + {Hexagon::L2_loadrd_io, Hexagon::dup_L2_loadrd_io}, + {Hexagon::L2_loadrh_io, Hexagon::dup_L2_loadrh_io}, + {Hexagon::L2_loadri_io, Hexagon::dup_L2_loadri_io}, + {Hexagon::L2_loadrub_io, Hexagon::dup_L2_loadrub_io}, + {Hexagon::L2_loadruh_io, Hexagon::dup_L2_loadruh_io}, + {Hexagon::S2_allocframe, Hexagon::dup_S2_allocframe}, + {Hexagon::S2_storerb_io, Hexagon::dup_S2_storerb_io}, + {Hexagon::S2_storerd_io, Hexagon::dup_S2_storerd_io}, + {Hexagon::S2_storerh_io, Hexagon::dup_S2_storerh_io}, + {Hexagon::S2_storeri_io, Hexagon::dup_S2_storeri_io}, + {Hexagon::S4_storeirb_io, Hexagon::dup_S4_storeirb_io}, + {Hexagon::S4_storeiri_io, Hexagon::dup_S4_storeiri_io}, + }; + unsigned OpNum = MI.getOpcode(); + // Conversion to Big core. + if (ForBigCore) { + auto Iter = DupMap.find(OpNum); + if (Iter != DupMap.end()) + return Iter->second; + } else { // Conversion to Tiny core. + for (auto Iter = DupMap.begin(), End = DupMap.end(); Iter != End; ++Iter) + if (Iter->second == OpNum) + return Iter->first; + } + return -1; +} + int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : @@ -3735,6 +3799,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // Rd = memw(Rs+#u4:2) // Rd = memub(Rs+#u4:0) case Hexagon::L2_loadri_io: + case Hexagon::dup_L2_loadri_io: DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); // Special case this one from Group L2. @@ -3753,6 +3818,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( } break; case Hexagon::L2_loadrub_io: + case Hexagon::dup_L2_loadrub_io: // Rd = memub(Rs+#u4:0) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -3772,6 +3838,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // [if ([!]p0[.new])] jumpr r31 case Hexagon::L2_loadrh_io: case Hexagon::L2_loadruh_io: + case Hexagon::dup_L2_loadrh_io: + case Hexagon::dup_L2_loadruh_io: // Rd = memh/memuh(Rs+#u3:1) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -3781,6 +3849,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_L2; break; case Hexagon::L2_loadrb_io: + case Hexagon::dup_L2_loadrb_io: // Rd = memb(Rs+#u3:0) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -3790,6 +3859,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_L2; break; case Hexagon::L2_loadrd_io: + case Hexagon::dup_L2_loadrd_io: // Rdd = memd(r29+#u5:3) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -3806,6 +3876,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: case Hexagon::L4_return: case Hexagon::L2_deallocframe: + case Hexagon::dup_L2_deallocframe: return HexagonII::HSIG_L2; case Hexagon::EH_RETURN_JMPR: case Hexagon::PS_jmpret: @@ -3825,6 +3896,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::SL2_jumpr31_t: case Hexagon::SL2_jumpr31_f: case Hexagon::SL2_jumpr31_tnew: + case Hexagon::SL2_jumpr31_fnew: DstReg = MI.getOperand(1).getReg(); SrcReg = MI.getOperand(0).getReg(); // [if ([!]p0[.new])] jumpr r31 @@ -3850,6 +3922,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // memw(Rs+#u4:2) = Rt // memb(Rs+#u4:0) = Rt case Hexagon::S2_storeri_io: + case Hexagon::dup_S2_storeri_io: // Special case this one from Group S2. // memw(r29+#u5:2) = Rt Src1Reg = MI.getOperand(0).getReg(); @@ -3866,6 +3939,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_S1; break; case Hexagon::S2_storerb_io: + case Hexagon::dup_S2_storerb_io: // memb(Rs+#u4:0) = Rt Src1Reg = MI.getOperand(0).getReg(); Src2Reg = MI.getOperand(2).getReg(); @@ -3883,6 +3957,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // memb(Rs+#u4) = #U1 // allocframe(#u5:3) case Hexagon::S2_storerh_io: + case Hexagon::dup_S2_storerh_io: // memh(Rs+#u3:1) = Rt Src1Reg = MI.getOperand(0).getReg(); Src2Reg = MI.getOperand(2).getReg(); @@ -3892,6 +3967,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_S1; break; case Hexagon::S2_storerd_io: + case Hexagon::dup_S2_storerd_io: // memd(r29+#s6:3) = Rtt Src1Reg = MI.getOperand(0).getReg(); Src2Reg = MI.getOperand(2).getReg(); @@ -3902,6 +3978,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_S2; break; case Hexagon::S4_storeiri_io: + case Hexagon::dup_S4_storeiri_io: // memw(Rs+#u4:2) = #U1 Src1Reg = MI.getOperand(0).getReg(); if (isIntRegForSubInst(Src1Reg) && MI.getOperand(1).isImm() && @@ -3910,6 +3987,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_S2; break; case Hexagon::S4_storeirb_io: + case Hexagon::dup_S4_storeirb_io: // memb(Rs+#u4) = #U1 Src1Reg = MI.getOperand(0).getReg(); if (isIntRegForSubInst(Src1Reg) && @@ -3918,6 +3996,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_S2; break; case Hexagon::S2_allocframe: + case Hexagon::dup_S2_allocframe: if (MI.getOperand(2).isImm() && isShiftedUInt<5,3>(MI.getOperand(2).getImm())) return HexagonII::HSIG_S1; @@ -3941,6 +4020,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( // Rd = sxth/sxtb/zxtb/zxth(Rs) // Rd = and(Rs,#1) case Hexagon::A2_addi: + case Hexagon::dup_A2_addi: DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg)) { @@ -3962,6 +4042,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( } break; case Hexagon::A2_add: + case Hexagon::dup_A2_add: // Rx = add(Rx,Rs) DstReg = MI.getOperand(0).getReg(); Src1Reg = MI.getOperand(1).getReg(); @@ -3971,6 +4052,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_A; break; case Hexagon::A2_andir: + case Hexagon::dup_A2_andir: // Same as zxtb. // Rd16=and(Rs16,#255) // Rd16=and(Rs16,#1) @@ -3983,6 +4065,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_A; break; case Hexagon::A2_tfr: + case Hexagon::dup_A2_tfr: // Rd = Rs DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -3990,6 +4073,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_A; break; case Hexagon::A2_tfrsi: + case Hexagon::dup_A2_tfrsi: // Rd = #u6 // Do not test for #u6 size since the const is getting extended // regardless and compound could be formed. @@ -4002,6 +4086,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::C2_cmovenewit: case Hexagon::C2_cmoveif: case Hexagon::C2_cmovenewif: + case Hexagon::dup_C2_cmoveit: + case Hexagon::dup_C2_cmovenewit: + case Hexagon::dup_C2_cmoveif: + case Hexagon::dup_C2_cmovenewif: // if ([!]P0[.new]) Rd = #0 // Actual form: // %r16 = C2_cmovenewit internal %p0, 0, implicit undef %r16; @@ -4013,6 +4101,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_A; break; case Hexagon::C2_cmpeqi: + case Hexagon::dup_C2_cmpeqi: // P0 = cmp.eq(Rs,#u2) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -4023,6 +4112,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( break; case Hexagon::A2_combineii: case Hexagon::A4_combineii: + case Hexagon::dup_A2_combineii: + case Hexagon::dup_A4_combineii: // Rdd = combine(#u2,#U2) DstReg = MI.getOperand(0).getReg(); if (isDblRegForSubInst(DstReg, HRI) && @@ -4035,6 +4126,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_A; break; case Hexagon::A4_combineri: + case Hexagon::dup_A4_combineri: + // Rdd = combine(Rs,#0) // Rdd = combine(Rs,#0) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -4044,6 +4137,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_A; break; case Hexagon::A4_combineir: + case Hexagon::dup_A4_combineir: // Rdd = combine(#0,Rs) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(2).getReg(); @@ -4056,6 +4150,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::A2_sxth: case Hexagon::A2_zxtb: case Hexagon::A2_zxth: + case Hexagon::dup_A2_sxtb: + case Hexagon::dup_A2_sxth: + case Hexagon::dup_A2_zxtb: + case Hexagon::dup_A2_zxth: // Rd = sxth/sxtb/zxtb/zxth(Rs) DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); @@ -4199,6 +4297,61 @@ bool HexagonInstrInfo::isAddrModeWithOffset(const MachineInstr &MI) const { addrMode == HexagonII::BaseLongOffset); } +bool HexagonInstrInfo::isPureSlot0(const MachineInstr &MI) const { + // Workaround for the Global Scheduler. Sometimes, it creates + // A4_ext as a Pseudo instruction and calls this function to see if + // it can be added to an existing bundle. Since the instruction doesn't + // belong to any BB yet, we can't use getUnits API. + if (MI.getOpcode() == Hexagon::A4_ext) + return false; + + unsigned FuncUnits = getUnits(MI); + return HexagonFUnits::isSlot0Only(FuncUnits); +} + +bool HexagonInstrInfo::isRestrictNoSlot1Store(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; + return ((F >> HexagonII::RestrictNoSlot1StorePos) & + HexagonII::RestrictNoSlot1StoreMask); +} + +void HexagonInstrInfo::changeDuplexOpcode(MachineBasicBlock::instr_iterator MII, + bool ToBigInstrs) const { + int Opcode = -1; + if (ToBigInstrs) { // To BigCore Instr. + // Check if the instruction can form a Duplex. + if (getDuplexCandidateGroup(*MII)) + // Get the opcode marked "dup_*" tag. + Opcode = getDuplexOpcode(*MII, ToBigInstrs); + } else // To TinyCore Instr. + Opcode = getDuplexOpcode(*MII, ToBigInstrs); + + // Change the opcode of the instruction. + if (Opcode >= 0) + MII->setDesc(get(Opcode)); +} + +// This function is used to translate instructions to facilitate generating +// Duplexes on TinyCore. +void HexagonInstrInfo::translateInstrsForDup(MachineFunction &MF, + bool ToBigInstrs) const { + for (auto &MB : MF) + for (MachineBasicBlock::instr_iterator Instr = MB.instr_begin(), + End = MB.instr_end(); + Instr != End; ++Instr) + changeDuplexOpcode(Instr, ToBigInstrs); +} + +// This is a specialized form of above function. +void HexagonInstrInfo::translateInstrsForDup( + MachineBasicBlock::instr_iterator MII, bool ToBigInstrs) const { + MachineBasicBlock *MBB = MII->getParent(); + while ((MII != MBB->instr_end()) && MII->isInsideBundle()) { + changeDuplexOpcode(MII, ToBigInstrs); + ++MII; + } +} + unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { using namespace HexagonII; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index f54cf6a6a1b19d..e529d86ee9aaf5 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -341,10 +341,10 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { MachineBasicBlock *TargetBB, SmallPtrSet &Visited) const; - bool isBaseImmOffset(const MachineInstr &MI) const; bool isAbsoluteSet(const MachineInstr &MI) const; bool isAccumulator(const MachineInstr &MI) const; bool isAddrModeWithOffset(const MachineInstr &MI) const; + bool isBaseImmOffset(const MachineInstr &MI) const; bool isComplex(const MachineInstr &MI) const; bool isCompoundBranchInstr(const MachineInstr &MI) const; bool isConstExtended(const MachineInstr &MI) const; @@ -387,6 +387,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool isPredicated(unsigned Opcode) const; bool isPredicateLate(unsigned Opcode) const; bool isPredictedTaken(unsigned Opcode) const; + bool isPureSlot0(const MachineInstr &MI) const; + bool isRestrictNoSlot1Store(const MachineInstr &MI) const; bool isSaveCalleeSavedRegsCall(const MachineInstr &MI) const; bool isSignExtendingLoad(const MachineInstr &MI) const; bool isSolo(const MachineInstr &MI) const; @@ -435,6 +437,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { getCompoundCandidateGroup(const MachineInstr &MI) const; unsigned getCompoundOpcode(const MachineInstr &GA, const MachineInstr &GB) const; + int getDuplexOpcode(const MachineInstr &MI, bool ForBigCore = true) const; int getCondOpcode(int Opc, bool sense) const; int getDotCurOp(const MachineInstr &MI) const; int getNonDotCurOp(const MachineInstr &MI) const; @@ -480,6 +483,17 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { void setBundleNoShuf(MachineBasicBlock::instr_iterator MIB) const; bool getBundleNoShuf(const MachineInstr &MIB) const; + + // When TinyCore with Duplexes is enabled, this function is used to translate + // tiny-instructions to big-instructions and vice versa to get the slot + // consumption. + void changeDuplexOpcode(MachineBasicBlock::instr_iterator MII, + bool ToBigInstrs) const; + void translateInstrsForDup(MachineFunction &MF, + bool ToBigInstrs = true) const; + void translateInstrsForDup(MachineBasicBlock::instr_iterator MII, + bool ToBigInstrs) const; + // Addressing mode relations. short changeAddrMode_abs_io(short Opc) const; short changeAddrMode_io_abs(short Opc) const; diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 6bbde51d641596..2bfd8d7cd4d643 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -362,6 +362,16 @@ def Rol: pf2; // --(1) Immediate ------------------------------------------------------- // +def Imm64Lo: SDNodeXFormgetTargetConstant(int32_t (N->getSExtValue()), + SDLoc(N), MVT::i32); +}]>; +def Imm64Hi: SDNodeXFormgetTargetConstant(int32_t (N->getSExtValue()>>32), + SDLoc(N), MVT::i32); +}]>; + + def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<0>]>; @@ -389,7 +399,10 @@ def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>; def: Pat<(i1 0), (PS_false)>; def: Pat<(i1 1), (PS_true)>; -def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; +def: Pat<(i64 imm:$v), (CONST64 imm:$v)>, + Requires<[UseSmallData,NotOptTinyCore]>; +def: Pat<(i64 imm:$v), + (Combinew (A2_tfrsi (Imm64Hi $v)), (A2_tfrsi (Imm64Lo $v)))>; def ftoi : SDNodeXFormgetValueAPF().bitcastToAPInt(); diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td index 4060dd38aa5215..5efd02ada54cf5 100644 --- a/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -67,3 +67,4 @@ include "HexagonScheduleV62.td" include "HexagonScheduleV65.td" include "HexagonScheduleV66.td" include "HexagonScheduleV67.td" +include "HexagonScheduleV67T.td" diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV67T.td b/llvm/lib/Target/Hexagon/HexagonScheduleV67T.td new file mode 100644 index 00000000000000..f2bcb1e7256c37 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonScheduleV67T.td @@ -0,0 +1,61 @@ +//=- HexagonScheduleV67T.td - Hexagon V67 Tiny Core Scheduling Definitions --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +class HexagonV67TPseudoItin { + list V67TPseudoItin_list = [ + InstrItinData], [2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>], + [2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], [2]> + ]; +} + +// V67TItin_list and HVXItin contain some old itineraries +// still used by a handful of instructions. Hopefully, we will be able to +// get rid of them soon. +def HexagonV67TItinList : DepScalarItinV67T, + DepHVXItinV67, HVXItin, HexagonV67TPseudoItin { + list V67TItin_list = [ + InstrItinData], + [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData], + [1, 1, 3, 3], + [Hex_FWD, Hex_FWD]> + ]; + + list ItinList = + !listconcat(DepScalarItinV67T_list, + DepHVXItinV67_list, V67TItin_list, + HVXItin_list, V67TPseudoItin_list); +} + +def HexagonItinerariesV67T : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV67TItinList.ItinList>; + + +def HexagonModelV67T : SchedMachineModel { + let IssueWidth = 3; + let Itineraries = HexagonItinerariesV67T; + let LoadLatency = 1; + let CompleteModel = 0; +} + +//===----------------------------------------------------------------------===// +// Hexagon V67 Tiny Core Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 54616878657133..adeb3e0c8dee1d 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -83,6 +83,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, InstrInfo(initializeSubtargetDependencies(CPU, FS)), RegInfo(getHwMode()), TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) { + Hexagon_MC::addArchSubtarget(this, FS); // Beware of the default constructor of InstrItineraryData: it will // reset all members to 0. assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized"); @@ -109,6 +110,13 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { if (OverrideLongCalls.getPosition()) UseLongCalls = OverrideLongCalls; + if (isTinyCore()) { + // Tiny core has a single thread, so back-to-back scheduling is enabled by + // default. + if (!EnableBSBSched.getPosition()) + UseBSBScheduling = false; + } + FeatureBitset Features = getFeatureBits(); if (HexagonDisableDuplex) setFeatureBits(Features.reset(Hexagon::FeatureDuplex)); diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index a2ec66589cf7da..2c6d489f53e4f8 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -87,8 +87,14 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { }; private: + enum HexagonProcFamilyEnum { Others, TinyCore }; + std::string CPUString; Triple TargetTriple; + + // The following objects can use the TargetTriple, so they must be + // declared after it. + HexagonProcFamilyEnum HexagonProcFamily = Others; HexagonInstrInfo InstrInfo; HexagonRegisterInfo RegInfo; HexagonTargetLowering TLInfo; @@ -185,6 +191,9 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool useUnsafeMath() const { return UseUnsafeMath; } bool useZRegOps() const { return UseZRegOps; } + bool isTinyCore() const { return HexagonProcFamily == TinyCore; } + bool isTinyCoreWithDuplex() const { return isTinyCore() && EnableDuplex; } + bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::NoArch; } diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 36d71c41da543a..b9c7be777bb4bd 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -242,6 +242,10 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { } } + // TinyCore with Duplexes: Translate to big-instructions. + if (HST.isTinyCoreWithDuplex()) + HII->translateInstrsForDup(MF, true); + // Loop over all of the basic blocks. for (auto &MB : MF) { auto Begin = MB.begin(), End = MB.end(); @@ -267,6 +271,10 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { } } + // TinyCore with Duplexes: Translate to tiny-instructions. + if (HST.isTinyCoreWithDuplex()) + HII->translateInstrsForDup(MF, false); + Packetizer.unpacketizeSoloInstrs(MF); return true; } @@ -1802,6 +1810,8 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, setmemShufDisabled(false); } + PacketHasDuplex = false; + PacketHasSLOT0OnlyInsn = false; ResourceTracker->clearResources(); LLVM_DEBUG(dbgs() << "End packet\n"); } @@ -1809,7 +1819,64 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { if (Minimal) return false; - return !producesStall(MI); + + // Constrainst for not packetizing this MI with existing instructions in a + // packet. + // MI is a store instruction. + // CurrentPacketMIs has a SLOT0 only instruction with constraint + // A_RESTRICT_NOSLOT1_STORE/isRestrictNoSlot1Store. + if (MI.mayStore() && isPureSlot0InsnWithNoSlot1Store(MI)) + return false; + + if (producesStall(MI)) + return false; + + // If TinyCore with Duplexes is enabled, check if this MI can form a Duplex + // with any other instruction in the existing packet. + auto &HST = MI.getParent()->getParent()->getSubtarget(); + // Constraint 1: Only one duplex allowed per packet. + // Constraint 2: Consider duplex checks only if there is atleast one + // instruction in a packet. + // Constraint 3: If one of the existing instructions in the packet has a + // SLOT0 only instruction that can not be duplexed, do not attempt to form + // duplexes. (TODO: This will invalidate the L4_return* instructions to form a + // duplex) + if (HST.isTinyCoreWithDuplex() && CurrentPacketMIs.size() > 0 && + !PacketHasDuplex) { + // Check for SLOT0 only non-duplexable instruction in packet. + for (auto &MJ : CurrentPacketMIs) + PacketHasSLOT0OnlyInsn |= HII->isPureSlot0(*MJ); + // Get the Big Core Opcode (dup_*). + int Opcode = HII->getDuplexOpcode(MI, false); + if (Opcode >= 0) { + // We now have an instruction that can be duplexed. + for (auto &MJ : CurrentPacketMIs) { + if (HII->isDuplexPair(MI, *MJ) && !PacketHasSLOT0OnlyInsn) { + PacketHasDuplex = true; + return true; + } + } + // If it can not be duplexed, check if there is a valid transition in DFA + // with the original opcode. + MachineInstr &MIRef = const_cast(MI); + MIRef.setDesc(HII->get(Opcode)); + return ResourceTracker->canReserveResources(MIRef); + } + } + + return true; +} + +bool HexagonPacketizerList::isPureSlot0InsnWithNoSlot1Store( + const MachineInstr &MI) { + bool noSlot1Store = false; + bool isSlot0Only = false; + for (auto J : CurrentPacketMIs) { + noSlot1Store |= HII->isRestrictNoSlot1Store(*J); + isSlot0Only |= HII->isPureSlot0(*J); + } + + return (noSlot1Store && isSlot0Only); } // V60 forward scheduling. diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 943b9ac7ecc49f..27a47220570a15 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -57,6 +57,13 @@ class HexagonPacketizerList : public VLIWPacketizerList { // instruction from the previous packet. bool PacketStalls = false; + // Set to true if the packet has a duplex pair of sub-instructions. + bool PacketHasDuplex = false; + + // Set to true if the packet has a instruction that can only be executed + // in SLOT0. + bool PacketHasSLOT0OnlyInsn = false; + protected: /// A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; @@ -149,6 +156,7 @@ class HexagonPacketizerList : public VLIWPacketizerList { bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J); bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J); bool producesStall(const MachineInstr &MI); + bool isPureSlot0InsnWithNoSlot1Store(const MachineInstr &MI); }; } // end namespace llvm diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 8f1e5c1c3a9797..ef7db3933e3659 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -43,6 +43,7 @@ class HexagonAsmBackend : public MCAsmBackend { std::unique_ptr MCII; std::unique_ptr RelaxTarget; MCInst * Extender; + unsigned MaxPacketSize; void ReplaceInstruction(MCCodeEmitter &E, MCRelaxableFragment &RF, MCInst &HMB) const { @@ -62,7 +63,8 @@ class HexagonAsmBackend : public MCAsmBackend { StringRef CPU) : MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU), relaxedCnt(0), MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *), - Extender(nullptr) {} + Extender(nullptr), MaxPacketSize(HexagonMCInstrInfo::packetSize(CPU)) + {} std::unique_ptr createObjectTargetWriter() const override { @@ -685,7 +687,7 @@ class HexagonAsmBackend : public MCAsmBackend { ParseIn = 0x00004000, // In packet parse-bits. ParseEnd = 0x0000c000; // End of packet parse-bits. - while(Count % HEXAGON_INSTR_SIZE) { + while (Count % HEXAGON_INSTR_SIZE) { LLVM_DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" << Count % HEXAGON_INSTR_SIZE << "/" << HEXAGON_INSTR_SIZE << "\n"); @@ -693,11 +695,11 @@ class HexagonAsmBackend : public MCAsmBackend { OS << '\0'; } - while(Count) { + while (Count) { Count -= HEXAGON_INSTR_SIZE; // Close the packet whenever a multiple of the maximum packet size remains - uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))? - ParseIn: ParseEnd; + uint32_t ParseBits = (Count % (MaxPacketSize * HEXAGON_INSTR_SIZE)) ? + ParseIn : ParseEnd; support::endian::write(OS, Nopcode | ParseBits, Endian); } return true; @@ -728,7 +730,8 @@ class HexagonAsmBackend : public MCAsmBackend { MCContext &Context = Asm.getContext(); auto &RF = cast(*K); auto &Inst = const_cast(RF.getInst()); - while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < 4) { + while (Size > 0 && + HexagonMCInstrInfo::bundleSize(Inst) < MaxPacketSize) { MCInst *Nop = new (Context) MCInst; Nop->setOpcode(Hexagon::A2_nop); Inst.addOperand(MCOperand::createInst(Nop)); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index 3cbb8600ce7aa8..5154a0a1e46c1e 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -10,12 +10,11 @@ // //===----------------------------------------------------------------------===// +#include "HexagonMCExpr.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -296,8 +295,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { DstReg = MCI.getOperand(1).getReg(); SrcReg = MCI.getOperand(0).getReg(); // [if ([!]p0[.new])] jumpr r31 - if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) && - (Hexagon::R31 == DstReg)) { + if ((Hexagon::P0 == SrcReg) && (Hexagon::R31 == DstReg)) { return HexagonII::HSIG_L2; } break; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 8eb32eb724535b..4f8a4325621969 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -119,10 +120,10 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { return (1); } -bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, - MCSubtargetInfo const &STI, - MCContext &Context, MCInst &MCB, - HexagonMCChecker *Check) { +namespace { +bool canonicalizePacketImpl(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Check) { // Check the bundle for errors. bool CheckOk = Check ? Check->check(false) : true; if (!CheckOk) @@ -132,9 +133,9 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, if (!HexagonDisableCompound) HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB); HexagonMCShuffle(Context, false, MCII, STI, MCB); + // Examine the packet and convert pairs of instructions to duplex // instructions when possible. - MCInst InstBundlePreDuplex = MCInst(MCB); if (STI.getFeatureBits() [Hexagon::FeatureDuplex]) { SmallVector possibleDuplexes; possibleDuplexes = @@ -146,8 +147,11 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, HexagonMCInstrInfo::padEndloop(MCB, Context); // If compounding and duplexing didn't reduce the size below // 4 or less we have a packet that is too big. - if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) + if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) { + if (Check) + Check->reportError("invalid instruction packet: out of slots"); return false; + } // Check the bundle for errors. CheckOk = Check ? Check->check(true) : true; if (!CheckOk) @@ -155,6 +159,27 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, HexagonMCShuffle(Context, true, MCII, STI, MCB); return true; } +} // namespace + +bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Check, + bool AttemptCompatibility) { + auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI); + if (!AttemptCompatibility || ArchSTI == nullptr) + return canonicalizePacketImpl(MCII, STI, Context, MCB, Check); + + const MCRegisterInfo *RI = Context.getRegisterInfo(); + HexagonMCChecker DefaultCheck(Context, MCII, STI, MCB, *RI, false); + HexagonMCChecker *BaseCheck = (Check == nullptr) ? &DefaultCheck : Check; + HexagonMCChecker PerfCheck(*BaseCheck, STI, false); + if (canonicalizePacketImpl(MCII, STI, Context, MCB, &PerfCheck)) + return true; + + HexagonMCChecker ArchCheck(*BaseCheck, *ArchSTI, true); + return canonicalizePacketImpl(MCII, *ArchSTI, Context, MCB, &ArchCheck); +} MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst, @@ -493,7 +518,7 @@ bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII, MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) { assert(isBundle(MCB)); - assert(Index < HEXAGON_PACKET_SIZE); + assert(Index < HEXAGON_PRESHUFFLE_PACKET_SIZE); return *MCB.getOperand(bundleInstructionsOffset + Index).getInst(); } @@ -633,6 +658,12 @@ bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII, return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); } +bool HexagonMCInstrInfo::isNewValueStore(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; +} + /// Return whether the operand is extendable. bool HexagonMCInstrInfo::isOpExtendable(MCInstrInfo const &MCII, MCInst const &MCI, unsigned short O) { @@ -675,8 +706,17 @@ bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII, !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask)); } -bool HexagonMCInstrInfo::isPredReg(unsigned Reg) { - return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0); +bool HexagonMCInstrInfo::isPredReg(MCRegisterInfo const &MRI, unsigned Reg) { + auto &PredRegClass = MRI.getRegClass(Hexagon::PredRegsRegClassID); + return PredRegClass.contains(Reg); +} + +bool HexagonMCInstrInfo::isPredRegister(MCInstrInfo const &MCII, + MCInst const &Inst, unsigned I) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst); + + return Inst.getOperand(I).isReg() && + Desc.OpInfo[I].RegClass == Hexagon::PredRegsRegClassID; } /// Return whether the insn can be packaged only with A and X-type insns. @@ -773,10 +813,8 @@ bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) { } bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) { - if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) && - (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST)) - return true; - return false; + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::isCVIPos) & HexagonII::isCVIMask; } int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) { @@ -822,6 +860,18 @@ bool HexagonMCInstrInfo::s27_2_reloc(MCExpr const &Expr) { return HExpr->s27_2_reloc(); } +unsigned HexagonMCInstrInfo::packetSizeSlots(MCSubtargetInfo const &STI) { + const bool IsTiny = STI.getFeatureBits()[Hexagon::ProcTinyCore]; + + return IsTiny ? (HEXAGON_PACKET_SIZE - 1) : HEXAGON_PACKET_SIZE; +} + +unsigned HexagonMCInstrInfo::packetSize(StringRef CPU) { + return llvm::StringSwitch(CPU) + .Case("hexagonv67t", 3) + .Default(4); +} + void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) { MCInst Nop; Nop.setOpcode(Hexagon::A2_nop); @@ -856,6 +906,33 @@ bool HexagonMCInstrInfo::hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI) { return (F >> HexagonII::HasTmpDstPos) & HexagonII::HasTmpDstMask; } +bool HexagonMCInstrInfo::requiresSlot(MCSubtargetInfo const &STI, + MCInst const &MCI) { + const unsigned OpCode = MCI.getOpcode(); + const bool IsTiny = STI.getFeatureBits() [Hexagon::ProcTinyCore]; + const bool NoSlotReqd = Hexagon::A4_ext == OpCode || + (IsTiny && Hexagon::A2_nop == OpCode) || + (IsTiny && Hexagon::J4_hintjumpr == OpCode); + + return !NoSlotReqd; +} + +unsigned HexagonMCInstrInfo::slotsConsumed(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCInst const &MCI) { + unsigned slotsUsed = 0; + for (auto HMI : bundleInstructions(MCI)) { + MCInst const &MCI = *HMI.getInst(); + if (!requiresSlot(STI, MCI)) + continue; + if (isDuplex(MCII, MCI)) + slotsUsed += 2; + else + ++slotsUsed; + } + return slotsUsed; +} + void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate) { assert(Candidate.packetIndexI < MCB.size()); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index c92cf346da3fa9..70022aaad7122f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -28,6 +28,7 @@ class MCContext; class MCExpr; class MCInstrDesc; class MCInstrInfo; +class MCRegisterInfo; class MCSubtargetInfo; class DuplexCandidate { @@ -91,7 +92,8 @@ size_t bundleSize(MCInst const &MCI); // Put the packet in to canonical form, compound, duplex, pad, and shuffle bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCContext &Context, MCInst &MCB, - HexagonMCChecker *Checker); + HexagonMCChecker *Checker, + bool AttemptCompatibility = false); // Create a duplex instruction given the two subinsts MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, @@ -257,6 +259,8 @@ bool isMemReorderDisabled(MCInst const &MCI); // Return whether the insn is a new-value consumer. bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +/// Return true if the operand is a new-value store insn. +bool isNewValueStore(MCInstrInfo const &MCII, MCInst const &MCI); bool isOpExtendable(MCInstrInfo const &MCII, MCInst const &MCI, unsigned short); // Can these two instructions be duplexed @@ -275,8 +279,11 @@ bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the predicate sense is true bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI); -// Is this a predicate register -bool isPredReg(unsigned Reg); +// Return true if this is a scalar predicate register. +bool isPredReg(MCRegisterInfo const &MRI, unsigned Reg); + +// Returns true if the Ith operand is a predicate register. +bool isPredRegister(MCInstrInfo const &MCII, MCInst const &Inst, unsigned I); // Return whether the insn is a prefix. bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); @@ -295,6 +302,21 @@ bool isVector(MCInstrInfo const &MCII, MCInst const &MCI); bool mustExtend(MCExpr const &Expr); bool mustNotExtend(MCExpr const &Expr); +// Returns true if this instruction requires a slot to execute. +bool requiresSlot(MCSubtargetInfo const &STI, MCInst const &MCI); + +unsigned packetSize(StringRef CPU); + +// Returns the maximum number of slots available in the given +// subtarget's packets. +unsigned packetSizeSlots(MCSubtargetInfo const &STI); + +// Returns the number of slots consumed by this packet, considering duplexed +// and compound instructions. +unsigned slotsConsumed(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst const &MCI); + + // Pad the bundle with nops to satisfy endloop requirements void padEndloop(MCInst &MCI, MCContext &Context); class PredicateInfo { diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 9078be34dca818..5d701a1a1585e1 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -75,6 +75,8 @@ cl::opt MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"), cl::init(false)); cl::opt MV67("mv67", cl::Hidden, cl::desc("Build for Hexagon V67"), cl::init(false)); +cl::opt MV67T("mv67t", cl::Hidden, cl::desc("Build for Hexagon V67T"), + cl::init(false)); cl::opt EnableHVX("mhvx", @@ -113,14 +115,20 @@ static StringRef HexagonGetArchVariant() { return "hexagonv66"; if (MV67) return "hexagonv67"; + if (MV67T) + return "hexagonv67t"; return ""; } StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) { StringRef ArchV = HexagonGetArchVariant(); if (!ArchV.empty() && !CPU.empty()) { - if (ArchV != CPU) - report_fatal_error("conflicting architectures specified."); + // Tiny cores have a "t" suffix that is discarded when creating a secondary + // non-tiny subtarget. See: addArchSubtarget + std::pair ArchP = ArchV.split('t'); + std::pair CPUP = CPU.split('t'); + if (!ArchP.first.equals(CPUP.first)) + report_fatal_error("conflicting architectures specified."); return CPU; } if (ArchV.empty()) { @@ -175,6 +183,14 @@ unsigned llvm::HexagonConvertUnits(unsigned ItinUnits, unsigned *Lanes) { } +namespace llvm { +namespace HexagonFUnits { +bool isSlot0Only(unsigned units) { + return HexagonItinerariesV62FU::SLOT0 == units; +} +} // namespace HexagonFUnits +} // namespace llvm + namespace { class HexagonTargetAsmStreamer : public HexagonTargetStreamer { @@ -353,7 +369,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { .Case("hexagonv62", "+hvxv62") .Case("hexagonv65", "+hvxv65") .Case("hexagonv66", "+hvxv66") - .Case("hexagonv67", "+hvxv67")); + .Case("hexagonv67", "+hvxv67") + .Case("hexagonv67t", "+hvxv67")); break; } case Hexagon::ArchEnum::NoArch: @@ -376,6 +393,18 @@ std::pair selectCPUAndFS(StringRef CPU, Result.second = selectHexagonFS(Result.first, FS); return Result; } +std::mutex ArchSubtargetMutex; +std::unordered_map> + ArchSubtarget; +} // namespace + +MCSubtargetInfo const * +Hexagon_MC::getArchSubtarget(MCSubtargetInfo const *STI) { + std::lock_guard Lock(ArchSubtargetMutex); + auto Existing = ArchSubtarget.find(std::string(STI->getCPU())); + if (Existing == ArchSubtarget.end()) + return nullptr; + return Existing->second.get(); } FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { @@ -440,6 +469,8 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, StringRef ArchFS = Features.second; MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS); + if (X != nullptr && (CPUName == "hexagonv67t")) + addArchSubtarget(X, ArchFS); if (CPU.equals("help")) exit(0); @@ -470,6 +501,19 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, return X; } +void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, + StringRef FS) { + assert(STI != nullptr); + if (STI->getCPU().contains("t")) { + auto ArchSTI = createHexagonMCSubtargetInfo( + STI->getTargetTriple(), + STI->getCPU().substr(0, STI->getCPU().size() - 1), FS); + std::lock_guard Lock(ArchSubtargetMutex); + ArchSubtarget[std::string(STI->getCPU())] = + std::unique_ptr(ArchSTI); + } +} + unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { static std::map ElfFlags = { {"hexagonv5", ELF::EF_HEXAGON_MACH_V5}, @@ -479,6 +523,7 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { {"hexagonv65", ELF::EF_HEXAGON_MACH_V65}, {"hexagonv66", ELF::EF_HEXAGON_MACH_V66}, {"hexagonv67", ELF::EF_HEXAGON_MACH_V67}, + {"hexagonv67t", ELF::EF_HEXAGON_MACH_V67T}, }; auto F = ElfFlags.find(STI.getCPU()); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 4066a43238e9c7..6cc6f51ab12c48 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -78,6 +78,9 @@ namespace Hexagon_MC { /// etc. do not need to go through TargetRegistry. MCSubtargetInfo *createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS); + MCSubtargetInfo const *getArchSubtarget(MCSubtargetInfo const *STI); + void addArchSubtarget(MCSubtargetInfo const *STI, + StringRef FS); unsigned GetELFFlags(const MCSubtargetInfo &STI); } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index f20c58ac965083..2788b86181e27f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -184,10 +184,6 @@ void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender, Packet.push_back(PI); } -const static struct { - unsigned first; - unsigned second; -} jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}}; static const unsigned Slot0Mask = 1 << 0; static const unsigned Slot1Mask = 1 << 1; @@ -256,6 +252,8 @@ bool HexagonShuffler::applySlotRestrictions( restrictSlot1AOK(Summary); restrictNoSlot1Store(Summary); + permitNonSlot(); + // These restrictions can modify the slot masks in the instructions // in the Packet member, but they can also detect constraint failures // which are fatal. @@ -279,20 +277,22 @@ void HexagonShuffler::restrictBranchOrder(HexagonPacketSummary const &Summary) { return; } + const static std::pair jumpSlots[] = { + {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}}; // try all possible choices - for (unsigned int i = 0; i < array_lengthof(jumpSlots); ++i) { + for (std::pair jumpSlot : jumpSlots) { // validate first jump with this slot rule - if (!(jumpSlots[i].first & Summary.branchInsts[0]->Core.getUnits())) + if (!(jumpSlot.first & Summary.branchInsts[0]->Core.getUnits())) continue; // validate second jump with this slot rule - if (!(jumpSlots[i].second & Summary.branchInsts[1]->Core.getUnits())) + if (!(jumpSlot.second & Summary.branchInsts[1]->Core.getUnits())) continue; // both valid for this configuration, set new slot rules const HexagonPacket PacketSave = Packet; - Summary.branchInsts[0]->Core.setUnits(jumpSlots[i].first); - Summary.branchInsts[1]->Core.setUnits(jumpSlots[i].second); + Summary.branchInsts[0]->Core.setUnits(jumpSlot.first); + Summary.branchInsts[1]->Core.setUnits(jumpSlot.second); const bool HasShuffledPacket = tryAuction(Summary).hasValue(); if (HasShuffledPacket) @@ -306,6 +306,15 @@ void HexagonShuffler::restrictBranchOrder(HexagonPacketSummary const &Summary) { reportError("invalid instruction packet: out of slots"); } + +void HexagonShuffler::permitNonSlot() { + for (HexagonInstr &ISJ : insts()) { + const bool RequiresSlot = HexagonMCInstrInfo::requiresSlot(STI, *ISJ.ID); + if (!RequiresSlot) + ISJ.Core.setAllUnits(); + } +} + bool HexagonShuffler::ValidResourceUsage(HexagonPacketSummary const &Summary) { Optional ShuffledPacket = tryAuction(Summary); @@ -394,8 +403,16 @@ bool HexagonShuffler::restrictStoreLoadOrder( // A single store must use slot #0. if (HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()) { if (!Summary.store0) { - if (Summary.stores == 1 && - (Summary.loads == 0 || !isMemReorderDisabled())) + const bool PacketHasNoOnlySlot0 = + llvm::none_of(insts(), [&](HexagonInstr const &I) { + return I.Core.getUnits() == Slot0Mask && + I.ID->getOpcode() != ID.getOpcode(); + }); + const bool SafeToMoveToSlot0 = + (Summary.loads == 0) || + (!isMemReorderDisabled() && PacketHasNoOnlySlot0); + + if (Summary.stores == 1 && SafeToMoveToSlot0) // Pin the store to slot #0 only if isMemReorderDisabled() == false ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore); else if (Summary.stores >= 1) { @@ -416,12 +433,6 @@ bool HexagonShuffler::restrictStoreLoadOrder( return false; } } - - if (!ISJ->Core.getUnits()) { - // Error if insn may not be executed in any slot. - reportError("invalid instruction packet: out of slots"); - return false; - } } return true; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 5f563ced734397..f7f646c6f5629f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" @@ -47,6 +48,9 @@ class HexagonResource { setWeight(s); } + void setAllUnits() { + setUnits(((1u << HEXAGON_PACKET_SIZE) - 1)); + } unsigned setWeight(unsigned s); unsigned getUnits() const { return (Slots); } @@ -178,12 +182,13 @@ class HexagonShuffler { void restrictSlot1AOK(HexagonPacketSummary const &Summary); void restrictNoSlot1Store(HexagonPacketSummary const &Summary); void restrictNoSlot1(); - - Optional tryAuction(HexagonPacketSummary const &Summary) const; - bool restrictStoreLoadOrder(HexagonPacketSummary const &Summary); void restrictBranchOrder(HexagonPacketSummary const &Summary); void restrictPreferSlot3(HexagonPacketSummary const &Summary); + void permitNonSlot(); + + Optional tryAuction(HexagonPacketSummary const &Summary) const; + HexagonPacketSummary GetPacketSummary(); bool ValidPacketMemoryOps(HexagonPacketSummary const &Summary) const; bool ValidResourceUsage(HexagonPacketSummary const &Summary); @@ -227,11 +232,10 @@ class HexagonShuffler { using InstPredicate = bool (*)(MCInstrInfo const &, MCInst const &); bool HasInstWith(InstPredicate Pred) const { - return llvm::any_of(make_range(cbegin(), cend()), - [&](HexagonInstr const &I) { - MCInst const &Inst = I.getDesc(); - return (*Pred)(MCII, Inst); - }); + return llvm::any_of(insts(), [&](HexagonInstr const &I) { + MCInst const &Inst = I.getDesc(); + return (*Pred)(MCII, Inst); + }); } // Add insn handle to the bundle . diff --git a/llvm/test/CodeGen/Hexagon/disable-const64-tinycore.ll b/llvm/test/CodeGen/Hexagon/disable-const64-tinycore.ll new file mode 100644 index 00000000000000..7ed59edc626b64 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/disable-const64-tinycore.ll @@ -0,0 +1,91 @@ +; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv67t < %s | FileCheck %s + +;CHECK-NOT: CONST64 + +define dso_local void @analyze(i16* nocapture %analysisBuffer0, i16* nocapture %analysisBuffer1, i32* nocapture %subband) local_unnamed_addr { +entry: + %0 = load i64, i64* undef, align 8 + %1 = tail call i64 @llvm.hexagon.S2.vtrunewh(i64 %0, i64 undef) + %2 = tail call i64 @llvm.hexagon.S2.vtrunowh(i64 %0, i64 undef) + %_HEXAGON_V64_internal_union.sroa.3.0.extract.shift = and i64 %1, -4294967296 + %3 = shl i64 %2, 32 + %conv15 = ashr exact i64 %3, 32 + %arrayidx16 = getelementptr inbounds i16, i16* %analysisBuffer0, i32 4 + %4 = bitcast i16* %arrayidx16 to i64* + store i64 %_HEXAGON_V64_internal_union.sroa.3.0.extract.shift, i64* %4, align 8 + %arrayidx17 = getelementptr inbounds i16, i16* %analysisBuffer1, i32 4 + %5 = bitcast i16* %arrayidx17 to i64* + store i64 %conv15, i64* %5, align 8 + %arrayidx18 = getelementptr inbounds i16, i16* %analysisBuffer1, i32 8 + %6 = bitcast i16* %arrayidx18 to i64* + %7 = load i64, i64* %6, align 8 + %8 = tail call i64 @llvm.hexagon.M2.mmachs.s1(i64 undef, i64 29819854865948160, i64 %7) + store i64 %8, i64* %6, align 8 + %arrayidx34 = getelementptr inbounds i16, i16* %analysisBuffer0, i32 40 + %9 = bitcast i16* %arrayidx34 to i64* + %10 = load i64, i64* %9, align 8 + %11 = tail call i64 @llvm.hexagon.M2.mmachs.s1(i64 undef, i64 282574488406740992, i64 %10) + %arrayidx35 = getelementptr inbounds i16, i16* %analysisBuffer0, i32 56 + %12 = bitcast i16* %arrayidx35 to i64* + %13 = load i64, i64* %12, align 8 + %14 = tail call i64 @llvm.hexagon.M2.mmacls.s1(i64 undef, i64 undef, i64 %13) + %15 = tail call i64 @llvm.hexagon.M2.mmachs.s1(i64 %8, i64 282574488406740992, i64 %7) + %16 = load i64, i64* null, align 8 + %17 = tail call i64 @llvm.hexagon.M2.mmacls.s1(i64 %14, i64 27234903028652032, i64 %16) + %18 = tail call i64 @llvm.hexagon.M2.mmacls.s1(i64 undef, i64 27234903028652032, i64 %7) + %19 = tail call i64 @llvm.hexagon.M2.mmachs.s1(i64 %15, i64 7661056, i64 %7) + %_HEXAGON_V64_internal_union53.sroa.3.0.extract.shift = lshr i64 %17, 32 + %_HEXAGON_V64_internal_union62.sroa.3.0.extract.shift = and i64 %18, -4294967296 + %_HEXAGON_V64_internal_union71.sroa.0.0.insert.insert = or i64 %_HEXAGON_V64_internal_union62.sroa.3.0.extract.shift, %_HEXAGON_V64_internal_union53.sroa.3.0.extract.shift + %_HEXAGON_V64_internal_union79.sroa.4.0.insert.shift = shl i64 %19, 32 + %_HEXAGON_V64_internal_union79.sroa.0.0.insert.ext = and i64 %11, 4294967295 + %_HEXAGON_V64_internal_union79.sroa.0.0.insert.insert = or i64 %_HEXAGON_V64_internal_union79.sroa.4.0.insert.shift, %_HEXAGON_V64_internal_union79.sroa.0.0.insert.ext + %20 = bitcast i32* %subband to i64* + %21 = tail call i64 @llvm.hexagon.M2.mmpyh.s0(i64 %_HEXAGON_V64_internal_union71.sroa.0.0.insert.insert, i64 undef) + %22 = tail call i64 @llvm.hexagon.A2.vsubw(i64 undef, i64 %21) + %23 = tail call i64 @llvm.hexagon.A2.vaddw(i64 undef, i64 undef) + %24 = tail call i64 @llvm.hexagon.S2.asl.i.vw(i64 %23, i32 2) + %25 = tail call i64 @llvm.hexagon.M2.mmpyl.s0(i64 0, i64 undef) + %26 = tail call i64 @llvm.hexagon.S2.asl.i.vw(i64 %25, i32 2) + %27 = tail call i64 @llvm.hexagon.A2.vsubw(i64 undef, i64 %24) + %28 = tail call i64 @llvm.hexagon.A2.vaddw(i64 %26, i64 %_HEXAGON_V64_internal_union79.sroa.0.0.insert.insert) + %29 = tail call i64 @llvm.hexagon.M2.mmpyh.s0(i64 %28, i64 undef) + %30 = tail call i64 @llvm.hexagon.M2.mmpyl.s0(i64 %27, i64 3998767301) + %31 = tail call i64 @llvm.hexagon.S2.asl.i.vw(i64 %30, i32 2) + %32 = tail call i64 @llvm.hexagon.A2.vaddw(i64 undef, i64 %29) + %33 = tail call i64 @llvm.hexagon.A2.vaddw(i64 0, i64 %31) + %34 = tail call i64 @llvm.hexagon.A2.vaddw(i64 %22, i64 undef) + %_HEXAGON_V64_internal_union8.sroa.0.0.insert.ext.i = and i64 %32, 4294967295 + store i64 %_HEXAGON_V64_internal_union8.sroa.0.0.insert.ext.i, i64* %20, align 8 + %_HEXAGON_V64_internal_union17.sroa.5.0.insert.shift.i = shl i64 %34, 32 + %_HEXAGON_V64_internal_union17.sroa.0.0.insert.ext.i = and i64 %33, 4294967295 + %_HEXAGON_V64_internal_union17.sroa.0.0.insert.insert.i = or i64 %_HEXAGON_V64_internal_union17.sroa.5.0.insert.shift.i, %_HEXAGON_V64_internal_union17.sroa.0.0.insert.ext.i + %arrayidx31.i = getelementptr inbounds i32, i32* %subband, i32 2 + %35 = bitcast i32* %arrayidx31.i to i64* + store i64 %_HEXAGON_V64_internal_union17.sroa.0.0.insert.insert.i, i64* %35, align 8 + %_HEXAGON_V64_internal_union32.sroa.0.0.insert.ext.i = and i64 %23, 4294967295 + %arrayidx46.i = getelementptr inbounds i32, i32* %subband, i32 4 + %36 = bitcast i32* %arrayidx46.i to i64* + store i64 %_HEXAGON_V64_internal_union32.sroa.0.0.insert.ext.i, i64* %36, align 8 + %arrayidx55.i = getelementptr inbounds i32, i32* %subband, i32 6 + %37 = bitcast i32* %arrayidx55.i to i64* + store i64 0, i64* %37, align 8 + %arrayidx64.i = getelementptr inbounds i32, i32* %subband, i32 8 + %38 = bitcast i32* %arrayidx64.i to i64* + store i64 0, i64* %38, align 8 + %arrayidx73.i = getelementptr inbounds i32, i32* %subband, i32 12 + %39 = bitcast i32* %arrayidx73.i to i64* + store i64 0, i64* %39, align 8 + ret void +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.hexagon.S2.vtrunewh(i64, i64) +declare i64 @llvm.hexagon.S2.vtrunowh(i64, i64) +declare i64 @llvm.hexagon.M2.mmachs.s1(i64, i64, i64) +declare i64 @llvm.hexagon.M2.mmacls.s1(i64, i64, i64) +declare i64 @llvm.hexagon.M2.mmpyh.s0(i64, i64) +declare i64 @llvm.hexagon.A2.vsubw(i64, i64) +declare i64 @llvm.hexagon.A2.vaddw(i64, i64) +declare i64 @llvm.hexagon.S2.asl.i.vw(i64, i32) +declare i64 @llvm.hexagon.M2.mmpyl.s0(i64, i64) diff --git a/llvm/test/CodeGen/Hexagon/disable-const64.ll b/llvm/test/CodeGen/Hexagon/disable-const64.ll new file mode 100644 index 00000000000000..df702537163e02 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/disable-const64.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv67t < %s | FileCheck %s + +target triple = "hexagon" + +; Disable CONST64 for tiny core since it is a memory operation and tiny core has +; only one memory resource per packet. +; CHECK: ## +; CHECK: ## + +define void @analyze(i16* nocapture %in) local_unnamed_addr { +entry: + %0 = bitcast i16* %in to i64* + %1 = tail call i64 @llvm.hexagon.M2.mmachs.s1(i64 10230955697128160, i64 10230955697128160, i64 0) + store i64 %1, i64* %0, align 8 + ret void +} + +; CHECK-NOT: CONST64 +define dso_local void @analyze2(i16* nocapture %in) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds i16, i16* %in, i32 3 + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i64 + %1 = tail call i64 @llvm.hexagon.M2.mmacls.s1(i64 undef, i64 30432282833584128, i64 %conv) + %arrayidx4 = getelementptr inbounds i16, i16* %in, i32 4 + %2 = bitcast i16* %arrayidx4 to i64* + store i64 %1, i64* %2, align 8 + ret void +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.hexagon.M2.mmachs.s1(i64, i64, i64) +declare i64 @llvm.hexagon.M2.mmacls.s1(i64, i64, i64) diff --git a/llvm/test/CodeGen/Hexagon/tc_duplex.ll b/llvm/test/CodeGen/Hexagon/tc_duplex.ll new file mode 100644 index 00000000000000..a98e56293644ee --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tc_duplex.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=hexagon -mattr=+duplex -mcpu=hexagonv67t < %s | FileCheck %s + +; Check that we generate two memory operations in tiny core if duplexes +; are enabled. + +; CHECK: { +; CHECK: memw +; CHECK-NEXT: memw +; CHECK: } +; CHECK: { +; CHECK: memw +; CHECK-NEXT: memw +; CHECK: } + +define i32 @test(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx.inc = getelementptr i32, i32* %a, i32 1 + %arrayidx1.inc = getelementptr i32, i32* %b, i32 1 + %2 = load i32, i32* %arrayidx.inc, align 4 + %3 = load i32, i32* %arrayidx1.inc, align 4 + %mul.1 = mul nsw i32 %3, %2 + %add.1 = add nsw i32 %mul.1, %mul + ret i32 %add.1 +} diff --git a/llvm/test/CodeGen/Hexagon/tc_duplex_asm.ll b/llvm/test/CodeGen/Hexagon/tc_duplex_asm.ll new file mode 100644 index 00000000000000..1c6366d1f4a130 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tc_duplex_asm.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hexagon -mattr=+duplex -mcpu=hexagonv67t -filetype=obj < %s \ +; RUN: -o - | llvm-objdump -d - | FileCheck %s + +; Check that we generate two memory operations in tiny core if duplexes +; are enabled. + +; CHECK: memw{{.*}};{{.*}}memw +; CHECK: memw{{.*}};{{.*}}memw + +define i32 @test(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx.inc = getelementptr i32, i32* %a, i32 1 + %arrayidx1.inc = getelementptr i32, i32* %b, i32 1 + %2 = load i32, i32* %arrayidx.inc, align 4 + %3 = load i32, i32* %arrayidx1.inc, align 4 + %mul.1 = mul nsw i32 %3, %2 + %add.1 = add nsw i32 %mul.1, %mul + ret i32 %add.1 +} diff --git a/llvm/test/CodeGen/Hexagon/tc_sched.ll b/llvm/test/CodeGen/Hexagon/tc_sched.ll new file mode 100644 index 00000000000000..130f49207be3e5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tc_sched.ll @@ -0,0 +1,82 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv67t < %s | FileCheck %s + +; A simple test case for the tiny core instruction latency information. + +; CHECK-LABEL: test +; CHECK-DAG: [[REG1:r([0-9]+)]] = memw([[REG0:r[0-9]+]]+#0) +; CHECK-DAG: [[REG2:r([0-9]+)]] = memw([[REG0]]+#4) +; CHECK-NEXT: } +; CHECK: { +; CHECK: { +; CHECK-NEXT: = add([[REG2]],[[REG1]]) + +define i32 @test(i32* nocapture readonly %p) local_unnamed_addr #0 { +entry: + %incdec.ptr = getelementptr inbounds i32, i32* %p, i32 1 + %0 = load i32, i32* %p, align 4 + %incdec.ptr1 = getelementptr inbounds i32, i32* %p, i32 2 + %1 = load i32, i32* %incdec.ptr, align 4 + %incdec.ptr2 = getelementptr inbounds i32, i32* %p, i32 3 + %2 = load i32, i32* %incdec.ptr1, align 4 + %3 = load i32, i32* %incdec.ptr2, align 4 + %add = add nsw i32 %1, %0 + %add4 = add nsw i32 %3, %2 + %mul = mul nsw i32 %add4, %add + ret i32 %mul +} + +; CHECK-LABEL: test1 +; CHECK-DAG: [[REG4:r([0-9]+)]] = memw([[REG3:r[0-9]+]]+#0) +; CHECK-DAG: [[REG5:r([0-9]+)]] = memw([[REG3]]+#4) +; CHECK-NEXT: } +; CHECK: { +; CHECK: { +; CHECK-NEXT: [[REG7:r([0-9]+)]] = add([[REG5]],[[REG4]]) +; CHECK: } +; CHECK-NEXT: { +; CHECK-NEXT: = sub([[REG7]] + +define i32 @test1(i32* nocapture readonly %p) local_unnamed_addr #0 { +entry: + %incdec.ptr = getelementptr inbounds i32, i32* %p, i32 1 + %0 = load i32, i32* %p, align 4 + %incdec.ptr1 = getelementptr inbounds i32, i32* %p, i32 2 + %1 = load i32, i32* %incdec.ptr, align 4 + %incdec.ptr2 = getelementptr inbounds i32, i32* %p, i32 3 + %2 = load i32, i32* %incdec.ptr1, align 4 + %3 = load i32, i32* %incdec.ptr2, align 4 + %add4.neg = add i32 %1, %0 + %add = sub i32 %add4.neg, %2 + %sub = sub i32 %add, %3 + ret i32 %sub +} + +; Test that multiplies are not placed in the same packet. +; CHECK-LABEL: test2 +; CHECK: = mpyi +; CHECK: } +; CHECK: = mpyi +; CHECK: } +; CHECK: = mpyi +; CHECK: } +; CHECK: = mpyi + +define i32 @test2(i32* nocapture readonly %p) local_unnamed_addr #1 { +entry: + %incdec.ptr = getelementptr inbounds i32, i32* %p, i32 1 + %0 = load i32, i32* %p, align 4 + %incdec.ptr1 = getelementptr inbounds i32, i32* %p, i32 2 + %1 = load i32, i32* %incdec.ptr, align 4 + %incdec.ptr2 = getelementptr inbounds i32, i32* %p, i32 3 + %2 = load i32, i32* %incdec.ptr1, align 4 + %3 = load i32, i32* %incdec.ptr2, align 4 + %mul = mul nsw i32 %1, %0 + %mul4 = mul nsw i32 %3, %2 + %mul5 = mul nsw i32 %3, %0 + %mul6 = mul nsw i32 %2, %1 + %call = tail call i32 @foo(i32 %mul, i32 %mul4, i32 %mul5, i32 %mul6) #3 + ret i32 %call +} + +declare i32 @foo(i32, i32, i32, i32) local_unnamed_addr #2 + diff --git a/llvm/test/CodeGen/Hexagon/tc_sched1.ll b/llvm/test/CodeGen/Hexagon/tc_sched1.ll new file mode 100644 index 00000000000000..7ea71f730d11a2 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tc_sched1.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv67t < %s | FileCheck %s + +; Another scheduling test for Tiny Core. + +; CHECK: memw +; CHECK: } +; CHECK: memw +; CHECK: } +; CHECK: memw +; CHECK: } +; CHECK: mpyi +; CHECK-NOT: } +; CHECK: memw +; CHECK: } +; CHECK: += mpyi +; CHECK-NOT: } +; CHECK: jumpr +; CHECK: } + +define i32 @test(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx.inc = getelementptr i32, i32* %a, i32 1 + %arrayidx1.inc = getelementptr i32, i32* %b, i32 1 + %2 = load i32, i32* %arrayidx.inc, align 4 + %3 = load i32, i32* %arrayidx1.inc, align 4 + %mul.1 = mul nsw i32 %3, %2 + %add.1 = add nsw i32 %mul.1, %mul + ret i32 %add.1 +} diff --git a/llvm/test/CodeGen/Hexagon/tiny_bkfir_artdeps.ll b/llvm/test/CodeGen/Hexagon/tiny_bkfir_artdeps.ll new file mode 100644 index 00000000000000..67dcf4688e4630 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tiny_bkfir_artdeps.ll @@ -0,0 +1,130 @@ +; RUN: llc -march=hexagon -mv67t -debug-only=pipeliner < %s 2>&1 | FileCheck %s + +; Test that the artificial dependencies have been created. + +; CHECK: Ord Latency=0 Artificial + +define void @bkfir(i32* nocapture readonly %in, i32* nocapture readonly %coefs, i32 %tap, i32 %length, i32* nocapture %out) local_unnamed_addr #0 { +entry: + %0 = bitcast i32* %out to i64* + %cmp141 = icmp sgt i32 %length, 0 + br i1 %cmp141, label %for.body.lr.ph, label %for.end52 + +for.body.lr.ph: + %1 = bitcast i32* %coefs to i64* + %cmp8127 = icmp sgt i32 %tap, 0 + br i1 %cmp8127, label %for.body.us.preheader, label %for.body.lr.ph.split + +for.body.us.preheader: + br label %for.body.us + +for.body.us: + %add.ptr.us.phi = phi i32* [ %add.ptr.us.inc, %for.cond7.for.end_crit_edge.us ], [ %in, %for.body.us.preheader ] + %i.0143.us = phi i32 [ %add51.us, %for.cond7.for.end_crit_edge.us ], [ 0, %for.body.us.preheader ] + %optr.0142.us = phi i64* [ %incdec.ptr49.us, %for.cond7.for.end_crit_edge.us ], [ %0, %for.body.us.preheader ] + %2 = bitcast i32* %add.ptr.us.phi to i64* + %incdec.ptr.us = getelementptr inbounds i32, i32* %add.ptr.us.phi, i32 2 + %3 = bitcast i32* %incdec.ptr.us to i64* + %4 = load i64, i64* %2, align 8 + %incdec.ptr1.us = getelementptr inbounds i32, i32* %add.ptr.us.phi, i32 4 + %5 = bitcast i32* %incdec.ptr1.us to i64* + %6 = load i64, i64* %3, align 8 + %_Q6V64_internal_union.sroa.0.0.extract.trunc.us = trunc i64 %6 to i32 + %_Q6V64_internal_union2.sroa.3.0.extract.shift.us = lshr i64 %4, 32 + %_Q6V64_internal_union2.sroa.3.0.extract.trunc.us = trunc i64 %_Q6V64_internal_union2.sroa.3.0.extract.shift.us to i32 + %7 = tail call i64 @llvm.hexagon.A2.combinew(i32 %_Q6V64_internal_union.sroa.0.0.extract.trunc.us, i32 %_Q6V64_internal_union2.sroa.3.0.extract.trunc.us) + %add.ptr.us.inc = getelementptr i32, i32* %add.ptr.us.phi, i32 4 + br label %for.body9.us + +for.body9.us: + %j.0137.us = phi i32 [ 0, %for.body.us ], [ %add.us, %for.body9.us ] + %x0x1.0136.us = phi i64 [ %4, %for.body.us ], [ %10, %for.body9.us ] + %x2x3.0135.us = phi i64 [ %6, %for.body.us ], [ %11, %for.body9.us ] + %x1x2.0134.us = phi i64 [ %7, %for.body.us ], [ %13, %for.body9.us ] + %iptrD.0133.us = phi i64* [ %5, %for.body.us ], [ %incdec.ptr13.us, %for.body9.us ] + %iptrC.0132.us = phi i64* [ %1, %for.body.us ], [ %incdec.ptr11.us, %for.body9.us ] + %sum0.0131.us = phi i64 [ 0, %for.body.us ], [ %18, %for.body9.us ] + %sum1.0130.us = phi i64 [ 0, %for.body.us ], [ %19, %for.body9.us ] + %sum2.0129.us = phi i64 [ 0, %for.body.us ], [ %20, %for.body9.us ] + %sum3.0128.us = phi i64 [ 0, %for.body.us ], [ %21, %for.body9.us ] + %incdec.ptr10.us = getelementptr inbounds i64, i64* %iptrC.0132.us, i32 1 + %8 = load i64, i64* %iptrC.0132.us, align 8 + %incdec.ptr11.us = getelementptr inbounds i64, i64* %iptrC.0132.us, i32 2 + %9 = load i64, i64* %incdec.ptr10.us, align 8 + %incdec.ptr12.us = getelementptr inbounds i64, i64* %iptrD.0133.us, i32 1 + %10 = load i64, i64* %iptrD.0133.us, align 8 + %incdec.ptr13.us = getelementptr inbounds i64, i64* %iptrD.0133.us, i32 2 + %11 = load i64, i64* %incdec.ptr12.us, align 8 + %_Q6V64_internal_union14.sroa.0.0.extract.trunc.us = trunc i64 %10 to i32 + %_Q6V64_internal_union14.sroa.4.0.extract.shift.us = lshr i64 %10, 32 + %_Q6V64_internal_union19.sroa.3.0.extract.shift.us = lshr i64 %x2x3.0135.us, 32 + %_Q6V64_internal_union19.sroa.3.0.extract.trunc.us = trunc i64 %_Q6V64_internal_union19.sroa.3.0.extract.shift.us to i32 + %12 = tail call i64 @llvm.hexagon.A2.combinew(i32 %_Q6V64_internal_union14.sroa.0.0.extract.trunc.us, i32 %_Q6V64_internal_union19.sroa.3.0.extract.trunc.us) + %_Q6V64_internal_union24.sroa.0.0.extract.trunc.us = trunc i64 %11 to i32 + %_Q6V64_internal_union29.sroa.3.0.extract.trunc.us = trunc i64 %_Q6V64_internal_union14.sroa.4.0.extract.shift.us to i32 + %13 = tail call i64 @llvm.hexagon.A2.combinew(i32 %_Q6V64_internal_union24.sroa.0.0.extract.trunc.us, i32 %_Q6V64_internal_union29.sroa.3.0.extract.trunc.us) + %14 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum0.0131.us, i64 %x0x1.0136.us, i64 %8) + %15 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum1.0130.us, i64 %x1x2.0134.us, i64 %8) + %16 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum2.0129.us, i64 %x2x3.0135.us, i64 %8) + %17 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum3.0128.us, i64 %12, i64 %8) + %18 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %14, i64 %x2x3.0135.us, i64 %9) + %19 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %15, i64 %12, i64 %9) + %20 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %16, i64 %10, i64 %9) + %21 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %17, i64 %13, i64 %9) + %add.us = add nuw nsw i32 %j.0137.us, 4 + %cmp8.us = icmp slt i32 %add.us, %tap + br i1 %cmp8.us, label %for.body9.us, label %for.cond7.for.end_crit_edge.us + +for.cond7.for.end_crit_edge.us: + %22 = ashr i64 %18, 39 + %23 = ashr i64 %19, 39 + %24 = ashr i64 %20, 39 + %25 = ashr i64 %21, 39 + %26 = tail call i32 @llvm.hexagon.A2.sat(i64 %22) + %27 = tail call i32 @llvm.hexagon.A2.sat(i64 %23) + %28 = tail call i32 @llvm.hexagon.A2.sat(i64 %24) + %29 = tail call i32 @llvm.hexagon.A2.sat(i64 %25) + %_Q6V64_internal_union34.sroa.4.0.insert.ext.us = zext i32 %27 to i64 + %_Q6V64_internal_union34.sroa.4.0.insert.shift.us = shl nuw i64 %_Q6V64_internal_union34.sroa.4.0.insert.ext.us, 32 + %_Q6V64_internal_union34.sroa.0.0.insert.ext.us = zext i32 %26 to i64 + %_Q6V64_internal_union34.sroa.0.0.insert.insert.us = or i64 %_Q6V64_internal_union34.sroa.4.0.insert.shift.us, %_Q6V64_internal_union34.sroa.0.0.insert.ext.us + %incdec.ptr41.us = getelementptr inbounds i64, i64* %optr.0142.us, i32 1 + store i64 %_Q6V64_internal_union34.sroa.0.0.insert.insert.us, i64* %optr.0142.us, align 8 + %_Q6V64_internal_union42.sroa.4.0.insert.ext.us = zext i32 %29 to i64 + %_Q6V64_internal_union42.sroa.4.0.insert.shift.us = shl nuw i64 %_Q6V64_internal_union42.sroa.4.0.insert.ext.us, 32 + %_Q6V64_internal_union42.sroa.0.0.insert.ext.us = zext i32 %28 to i64 + %_Q6V64_internal_union42.sroa.0.0.insert.insert.us = or i64 %_Q6V64_internal_union42.sroa.4.0.insert.shift.us, %_Q6V64_internal_union42.sroa.0.0.insert.ext.us + %incdec.ptr49.us = getelementptr inbounds i64, i64* %optr.0142.us, i32 2 + store i64 %_Q6V64_internal_union42.sroa.0.0.insert.insert.us, i64* %incdec.ptr41.us, align 8 + %add51.us = add nuw nsw i32 %i.0143.us, 4 + %cmp.us = icmp slt i32 %add51.us, %length + br i1 %cmp.us, label %for.body.us, label %for.end52 + +for.body.lr.ph.split: + %30 = tail call i32 @llvm.hexagon.A2.sat(i64 0) + %_Q6V64_internal_union34.sroa.4.0.insert.ext = zext i32 %30 to i64 + %_Q6V64_internal_union34.sroa.4.0.insert.shift = shl nuw i64 %_Q6V64_internal_union34.sroa.4.0.insert.ext, 32 + %_Q6V64_internal_union34.sroa.0.0.insert.insert = or i64 %_Q6V64_internal_union34.sroa.4.0.insert.shift, %_Q6V64_internal_union34.sroa.4.0.insert.ext + br label %for.body + +for.body: + %i.0143 = phi i32 [ 0, %for.body.lr.ph.split ], [ %add51, %for.body ] + %optr.0142 = phi i64* [ %0, %for.body.lr.ph.split ], [ %incdec.ptr49, %for.body ] + %incdec.ptr41 = getelementptr inbounds i64, i64* %optr.0142, i32 1 + store i64 %_Q6V64_internal_union34.sroa.0.0.insert.insert, i64* %optr.0142, align 8 + %incdec.ptr49 = getelementptr inbounds i64, i64* %optr.0142, i32 2 + store i64 %_Q6V64_internal_union34.sroa.0.0.insert.insert, i64* %incdec.ptr41, align 8 + %add51 = add nuw nsw i32 %i.0143, 4 + %cmp = icmp slt i32 %add51, %length + br i1 %cmp, label %for.body, label %for.end52 + +for.end52: + ret void +} + +declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1 +declare i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64, i64, i64) #1 +declare i32 @llvm.hexagon.A2.sat(i64) #1 + +attributes #0 = { nounwind "target-cpu"="hexagonv67t" "target-features"="+audio" } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/tiny_bkfir_loop_align.ll b/llvm/test/CodeGen/Hexagon/tiny_bkfir_loop_align.ll new file mode 100644 index 00000000000000..f2682b2db5a481 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tiny_bkfir_loop_align.ll @@ -0,0 +1,134 @@ +; RUN: llc -O3 -mv67t -march=hexagon < %s | FileCheck %s + +; Test that the inner loop in the tiny core version of bkfir has the assembler +; directive "p2align 4". + +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK-NOT: falign +; CHECK: p2align 4 +; CHECK: } :endloop0 + +define void @bkfir(i32* nocapture readonly %in, i32* nocapture readonly %coefs, i32 %tap, i32 %length, i32* nocapture %out) local_unnamed_addr #0 { +entry: + %0 = bitcast i32* %out to i64* + %cmp141 = icmp sgt i32 %length, 0 + br i1 %cmp141, label %for.body.lr.ph, label %for.end52 + +for.body.lr.ph: + %1 = bitcast i32* %coefs to i64* + %cmp8127 = icmp sgt i32 %tap, 0 + br i1 %cmp8127, label %for.body.us.preheader, label %for.body.lr.ph.split + +for.body.us.preheader: + br label %for.body.us + +for.body.us: + %add.ptr.us.phi = phi i32* [ %add.ptr.us.inc, %for.cond7.for.end_crit_edge.us ], [ %in, %for.body.us.preheader ] + %i.0143.us = phi i32 [ %add51.us, %for.cond7.for.end_crit_edge.us ], [ 0, %for.body.us.preheader ] + %optr.0142.us = phi i64* [ %incdec.ptr49.us, %for.cond7.for.end_crit_edge.us ], [ %0, %for.body.us.preheader ] + %2 = bitcast i32* %add.ptr.us.phi to i64* + %incdec.ptr.us = getelementptr inbounds i32, i32* %add.ptr.us.phi, i32 2 + %3 = bitcast i32* %incdec.ptr.us to i64* + %4 = load i64, i64* %2, align 8 + %incdec.ptr1.us = getelementptr inbounds i32, i32* %add.ptr.us.phi, i32 4 + %5 = bitcast i32* %incdec.ptr1.us to i64* + %6 = load i64, i64* %3, align 8 + %_Q6V64_internal_union.sroa.0.0.extract.trunc.us = trunc i64 %6 to i32 + %_Q6V64_internal_union2.sroa.3.0.extract.shift.us = lshr i64 %4, 32 + %_Q6V64_internal_union2.sroa.3.0.extract.trunc.us = trunc i64 %_Q6V64_internal_union2.sroa.3.0.extract.shift.us to i32 + %7 = tail call i64 @llvm.hexagon.A2.combinew(i32 %_Q6V64_internal_union.sroa.0.0.extract.trunc.us, i32 %_Q6V64_internal_union2.sroa.3.0.extract.trunc.us) + %add.ptr.us.inc = getelementptr i32, i32* %add.ptr.us.phi, i32 4 + br label %for.body9.us + +for.body9.us: + %j.0137.us = phi i32 [ 0, %for.body.us ], [ %add.us, %for.body9.us ] + %x0x1.0136.us = phi i64 [ %4, %for.body.us ], [ %10, %for.body9.us ] + %x2x3.0135.us = phi i64 [ %6, %for.body.us ], [ %11, %for.body9.us ] + %x1x2.0134.us = phi i64 [ %7, %for.body.us ], [ %13, %for.body9.us ] + %iptrD.0133.us = phi i64* [ %5, %for.body.us ], [ %incdec.ptr13.us, %for.body9.us ] + %iptrC.0132.us = phi i64* [ %1, %for.body.us ], [ %incdec.ptr11.us, %for.body9.us ] + %sum0.0131.us = phi i64 [ 0, %for.body.us ], [ %18, %for.body9.us ] + %sum1.0130.us = phi i64 [ 0, %for.body.us ], [ %19, %for.body9.us ] + %sum2.0129.us = phi i64 [ 0, %for.body.us ], [ %20, %for.body9.us ] + %sum3.0128.us = phi i64 [ 0, %for.body.us ], [ %21, %for.body9.us ] + %incdec.ptr10.us = getelementptr inbounds i64, i64* %iptrC.0132.us, i32 1 + %8 = load i64, i64* %iptrC.0132.us, align 8 + %incdec.ptr11.us = getelementptr inbounds i64, i64* %iptrC.0132.us, i32 2 + %9 = load i64, i64* %incdec.ptr10.us, align 8 + %incdec.ptr12.us = getelementptr inbounds i64, i64* %iptrD.0133.us, i32 1 + %10 = load i64, i64* %iptrD.0133.us, align 8 + %incdec.ptr13.us = getelementptr inbounds i64, i64* %iptrD.0133.us, i32 2 + %11 = load i64, i64* %incdec.ptr12.us, align 8 + %_Q6V64_internal_union14.sroa.0.0.extract.trunc.us = trunc i64 %10 to i32 + %_Q6V64_internal_union14.sroa.4.0.extract.shift.us = lshr i64 %10, 32 + %_Q6V64_internal_union19.sroa.3.0.extract.shift.us = lshr i64 %x2x3.0135.us, 32 + %_Q6V64_internal_union19.sroa.3.0.extract.trunc.us = trunc i64 %_Q6V64_internal_union19.sroa.3.0.extract.shift.us to i32 + %12 = tail call i64 @llvm.hexagon.A2.combinew(i32 %_Q6V64_internal_union14.sroa.0.0.extract.trunc.us, i32 %_Q6V64_internal_union19.sroa.3.0.extract.trunc.us) + %_Q6V64_internal_union24.sroa.0.0.extract.trunc.us = trunc i64 %11 to i32 + %_Q6V64_internal_union29.sroa.3.0.extract.trunc.us = trunc i64 %_Q6V64_internal_union14.sroa.4.0.extract.shift.us to i32 + %13 = tail call i64 @llvm.hexagon.A2.combinew(i32 %_Q6V64_internal_union24.sroa.0.0.extract.trunc.us, i32 %_Q6V64_internal_union29.sroa.3.0.extract.trunc.us) + %14 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum0.0131.us, i64 %x0x1.0136.us, i64 %8) + %15 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum1.0130.us, i64 %x1x2.0134.us, i64 %8) + %16 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum2.0129.us, i64 %x2x3.0135.us, i64 %8) + %17 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %sum3.0128.us, i64 %12, i64 %8) + %18 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %14, i64 %x2x3.0135.us, i64 %9) + %19 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %15, i64 %12, i64 %9) + %20 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %16, i64 %10, i64 %9) + %21 = tail call i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64 %17, i64 %13, i64 %9) + %add.us = add nuw nsw i32 %j.0137.us, 4 + %cmp8.us = icmp slt i32 %add.us, %tap + br i1 %cmp8.us, label %for.body9.us, label %for.cond7.for.end_crit_edge.us + +for.cond7.for.end_crit_edge.us: + %22 = ashr i64 %18, 39 + %23 = ashr i64 %19, 39 + %24 = ashr i64 %20, 39 + %25 = ashr i64 %21, 39 + %26 = tail call i32 @llvm.hexagon.A2.sat(i64 %22) + %27 = tail call i32 @llvm.hexagon.A2.sat(i64 %23) + %28 = tail call i32 @llvm.hexagon.A2.sat(i64 %24) + %29 = tail call i32 @llvm.hexagon.A2.sat(i64 %25) + %_Q6V64_internal_union34.sroa.4.0.insert.ext.us = zext i32 %27 to i64 + %_Q6V64_internal_union34.sroa.4.0.insert.shift.us = shl nuw i64 %_Q6V64_internal_union34.sroa.4.0.insert.ext.us, 32 + %_Q6V64_internal_union34.sroa.0.0.insert.ext.us = zext i32 %26 to i64 + %_Q6V64_internal_union34.sroa.0.0.insert.insert.us = or i64 %_Q6V64_internal_union34.sroa.4.0.insert.shift.us, %_Q6V64_internal_union34.sroa.0.0.insert.ext.us + %incdec.ptr41.us = getelementptr inbounds i64, i64* %optr.0142.us, i32 1 + store i64 %_Q6V64_internal_union34.sroa.0.0.insert.insert.us, i64* %optr.0142.us, align 8 + %_Q6V64_internal_union42.sroa.4.0.insert.ext.us = zext i32 %29 to i64 + %_Q6V64_internal_union42.sroa.4.0.insert.shift.us = shl nuw i64 %_Q6V64_internal_union42.sroa.4.0.insert.ext.us, 32 + %_Q6V64_internal_union42.sroa.0.0.insert.ext.us = zext i32 %28 to i64 + %_Q6V64_internal_union42.sroa.0.0.insert.insert.us = or i64 %_Q6V64_internal_union42.sroa.4.0.insert.shift.us, %_Q6V64_internal_union42.sroa.0.0.insert.ext.us + %incdec.ptr49.us = getelementptr inbounds i64, i64* %optr.0142.us, i32 2 + store i64 %_Q6V64_internal_union42.sroa.0.0.insert.insert.us, i64* %incdec.ptr41.us, align 8 + %add51.us = add nuw nsw i32 %i.0143.us, 4 + %cmp.us = icmp slt i32 %add51.us, %length + br i1 %cmp.us, label %for.body.us, label %for.end52 + +for.body.lr.ph.split: + %30 = tail call i32 @llvm.hexagon.A2.sat(i64 0) + %_Q6V64_internal_union34.sroa.4.0.insert.ext = zext i32 %30 to i64 + %_Q6V64_internal_union34.sroa.4.0.insert.shift = shl nuw i64 %_Q6V64_internal_union34.sroa.4.0.insert.ext, 32 + %_Q6V64_internal_union34.sroa.0.0.insert.insert = or i64 %_Q6V64_internal_union34.sroa.4.0.insert.shift, %_Q6V64_internal_union34.sroa.4.0.insert.ext + br label %for.body + +for.body: + %i.0143 = phi i32 [ 0, %for.body.lr.ph.split ], [ %add51, %for.body ] + %optr.0142 = phi i64* [ %0, %for.body.lr.ph.split ], [ %incdec.ptr49, %for.body ] + %incdec.ptr41 = getelementptr inbounds i64, i64* %optr.0142, i32 1 + store i64 %_Q6V64_internal_union34.sroa.0.0.insert.insert, i64* %optr.0142, align 8 + %incdec.ptr49 = getelementptr inbounds i64, i64* %optr.0142, i32 2 + store i64 %_Q6V64_internal_union34.sroa.0.0.insert.insert, i64* %incdec.ptr41, align 8 + %add51 = add nuw nsw i32 %i.0143, 4 + %cmp = icmp slt i32 %add51, %length + br i1 %cmp, label %for.body, label %for.end52 + +for.end52: + ret void +} + +declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1 +declare i64 @llvm.hexagon.M7.dcmpyrwc.acc(i64, i64, i64) #1 +declare i32 @llvm.hexagon.A2.sat(i64) #1 + +attributes #0 = { nounwind "target-cpu"="hexagonv67t" "target-features"="+audio" } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/tinycore.ll b/llvm/test/CodeGen/Hexagon/tinycore.ll new file mode 100644 index 00000000000000..b8ddb9c2c43e8b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tinycore.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv67t < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv65 < %s | FileCheck --check-prefix=CHECK-BIG %s + +; Test that the tiny core architecture generates 3 slot packets at most and +; a single load/store per packet at most. + +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: { +; CHECK-NEXT: mpy +; CHECK-NEXT: combine +; CHECK-NEXT: memw +; CHECK-NEXT: } +; CHECK: memw +; CHECK: } :endloop0 + +; Test the loop contains a single packet with 4 instructions. +; CHECK-BIG: loop0(.LBB0_[[LOOP:.]], +; CHECK-BIG: .LBB0_[[LOOP]]: +; CHECK-BIG: { +; CHECK-BIG: += mpyi +; CHECK-BIG-NEXT: = combine +; CHECK-BIG-NEXT: = memw +; CHECK-BIG-NEXT: = memw +; CHECK-BIG-NEXT: } :endloop0 + +define i32 @test(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +entry: + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body, label %for.end + +for.body: + %sum.010 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %a, %entry ] + %arrayidx1.phi = phi i32* [ %arrayidx1.inc, %for.body ], [ %b, %entry ] + %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %0 = load i32, i32* %arrayidx.phi, align 4 + %1 = load i32, i32* %arrayidx1.phi, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, %sum.010 + %inc = add nuw nsw i32 %i.09, 1 + %exitcond = icmp eq i32 %inc, %n + %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1 + %arrayidx1.inc = getelementptr i32, i32* %arrayidx1.phi, i32 1 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} + diff --git a/llvm/test/MC/Hexagon/audio.s b/llvm/test/MC/Hexagon/audio.s new file mode 100644 index 00000000000000..0330dd24218f02 --- /dev/null +++ b/llvm/test/MC/Hexagon/audio.s @@ -0,0 +1,27 @@ +# RUN: llvm-mc -filetype=asm -triple=hexagon-unknown-elf -mcpu=hexagonv67t %s | FileCheck %s +# RUN: llvm-mc -filetype=asm -triple=hexagon-unknown-elf -mcpu=hexagonv67 -mattr=+audio %s | FileCheck %s +# RUN: not llvm-mc -filetype=asm -triple=hexagon-unknown-elf -mcpu=hexagonv67 %s 2>&1 | FileCheck -check-prefix=CHECKINV %s + +# CHECK: clip +# CHECKINV: error: invalid instruction +r0 = clip(r0, #1) + +# CHECK: cround +# CHECKINV: error: invalid instruction +r1:0 = cround(r1:0, #4) + +# CHECK: vclip +# CHECKINV: error: invalid instruction +r1:0 = vclip(r1:0, #2) + +# CHECK: += cmpyiw +# CHECKINV: error: invalid instruction +r5:4 += cmpyiw(r5:4,r3:2) + +# CHECK: cmpyrw +# CHECKINV: error: invalid instruction +r5:4 = cmpyrw(r5:4,r3:2) + +# CHECK: cmpyrw(r7:6,r5:4):<<1:rnd:sat +# CHECKINV: error: invalid instruction +r7 = cmpyrw(r7:6,r5:4):<<1:rnd:sat diff --git a/llvm/test/MC/Hexagon/cmpyrw.s b/llvm/test/MC/Hexagon/cmpyrw.s new file mode 100644 index 00000000000000..37244f7547dc4e --- /dev/null +++ b/llvm/test/MC/Hexagon/cmpyrw.s @@ -0,0 +1,3 @@ +# RUN: llvm-mc -arch=hexagon -mv67t -filetype=obj %s | llvm-objdump -mv67t -mattr=+audio -d - | FileCheck %s +r23:22 = cmpyrw(r15:14,r21:20*) +# CHECK: r23:22 = cmpyrw(r15:14,r21:20*) diff --git a/llvm/test/MC/Hexagon/extensions/v67t_audio.s b/llvm/test/MC/Hexagon/extensions/v67t_audio.s new file mode 100644 index 00000000000000..c0cdb340203b38 --- /dev/null +++ b/llvm/test/MC/Hexagon/extensions/v67t_audio.s @@ -0,0 +1,108 @@ +# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv67t -filetype=obj %s | llvm-objdump -mcpu=hexagonv67t -d - | FileCheck --implicit-check-not='{' %s + + + +// Warning: This file is auto generated by mktest.py. Do not edit! +// Created at Wed Aug 22 11:17:37 2018 +// Created using: +// Arch: v67t, commit: 324e85a78e99759c3643d207f9d9b42bbfaf00f6 + +// A7_clip +// Rd32=clip(Rs32,#u5) + r0=clip(r0,#0) +# CHECK: 88c0c0a0 { r0 = clip(r0,#0) } + +// A7_croundd_ri +// Rdd32=cround(Rss32,#u6) + r1:0=cround(r1:0,#0) +# CHECK-NEXT: 8ce0c040 { r1:0 = cround(r1:0,#0) } + +// A7_croundd_rr +// Rdd32=cround(Rss32,Rt32) + r1:0=cround(r1:0,r0) +# CHECK-NEXT: c6c0c040 { r1:0 = cround(r1:0,r0) } + +// A7_vclip +// Rdd32=vclip(Rss32,#u5) + r1:0=vclip(r1:0,#0) +# CHECK-NEXT: 88c0c0c0 { r1:0 = vclip(r1:0,#0) } + +// M7_dcmpyiw +// Rdd32=cmpyiw(Rss32,Rtt32) + r1:0=cmpyiw(r1:0,r1:0) +# CHECK-NEXT: e860c040 { r1:0 = cmpyiw(r1:0,r1:0) } + +// M7_dcmpyiw_acc +// Rxx32+=cmpyiw(Rss32,Rtt32) + r1:0+=cmpyiw(r1:0,r1:0) +# CHECK-NEXT: ea60c040 { r1:0 += cmpyiw(r1:0,r1:0) } + +// M7_dcmpyiwc +// Rdd32=cmpyiw(Rss32,Rtt32*) + r1:0=cmpyiw(r1:0,r1:0*) +# CHECK-NEXT: e8e0c040 { r1:0 = cmpyiw(r1:0,r1:0*) } + +// M7_dcmpyiwc_acc +// Rxx32+=cmpyiw(Rss32,Rtt32*) + r1:0+=cmpyiw(r1:0,r1:0*) +# CHECK-NEXT: ea40c0c0 { r1:0 += cmpyiw(r1:0,r1:0*) } + +// M7_dcmpyrw +// Rdd32=cmpyrw(Rss32,Rtt32) + r1:0=cmpyrw(r1:0,r1:0) +# CHECK-NEXT: e880c040 { r1:0 = cmpyrw(r1:0,r1:0) } + +// M7_dcmpyrw_acc +// Rxx32+=cmpyrw(Rss32,Rtt32) + r1:0+=cmpyrw(r1:0,r1:0) +# CHECK-NEXT: ea80c040 { r1:0 += cmpyrw(r1:0,r1:0) } + +// M7_dcmpyrwc +// Rdd32=cmpyrw(Rss32,Rtt32*) + r1:0=cmpyrw(r1:0,r1:0*) +# CHECK-NEXT: e8c0c040 { r1:0 = cmpyrw(r1:0,r1:0*) } + +// M7_dcmpyrwc_acc +// Rxx32+=cmpyrw(Rss32,Rtt32*) + r1:0+=cmpyrw(r1:0,r1:0*) +# CHECK-NEXT: eac0c040 { r1:0 += cmpyrw(r1:0,r1:0*) } + +// M7_wcmpyiw +// Rd32=cmpyiw(Rss32,Rtt32):<<1:sat + r0=cmpyiw(r1:0,r1:0):<<1:sat +# CHECK-NEXT: e920c000 { r0 = cmpyiw(r1:0,r1:0):<<1:sat } + +// M7_wcmpyiw_rnd +// Rd32=cmpyiw(Rss32,Rtt32):<<1:rnd:sat + r0=cmpyiw(r1:0,r1:0):<<1:rnd:sat +# CHECK-NEXT: e9a0c000 { r0 = cmpyiw(r1:0,r1:0):<<1:rnd:sat } + +// M7_wcmpyiwc +// Rd32=cmpyiw(Rss32,Rtt32*):<<1:sat + r0=cmpyiw(r1:0,r1:0*):<<1:sat +# CHECK-NEXT: e900c080 { r0 = cmpyiw(r1:0,r1:0*):<<1:sat } + +// M7_wcmpyiwc_rnd +// Rd32=cmpyiw(Rss32,Rtt32*):<<1:rnd:sat + r0=cmpyiw(r1:0,r1:0*):<<1:rnd:sat +# CHECK-NEXT: e980c080 { r0 = cmpyiw(r1:0,r1:0*):<<1:rnd:sat } + +// M7_wcmpyrw +// Rd32=cmpyrw(Rss32,Rtt32):<<1:sat + r0=cmpyrw(r1:0,r1:0):<<1:sat +# CHECK-NEXT: e940c000 { r0 = cmpyrw(r1:0,r1:0):<<1:sat } + +// M7_wcmpyrw_rnd +// Rd32=cmpyrw(Rss32,Rtt32):<<1:rnd:sat + r0=cmpyrw(r1:0,r1:0):<<1:rnd:sat +# CHECK-NEXT: e9c0c000 { r0 = cmpyrw(r1:0,r1:0):<<1:rnd:sat } + +// M7_wcmpyrwc +// Rd32=cmpyrw(Rss32,Rtt32*):<<1:sat + r0=cmpyrw(r1:0,r1:0*):<<1:sat +# CHECK-NEXT: e960c000 { r0 = cmpyrw(r1:0,r1:0*):<<1:sat } + +// M7_wcmpyrwc_rnd +// Rd32=cmpyrw(Rss32,Rtt32*):<<1:rnd:sat + r0=cmpyrw(r1:0,r1:0*):<<1:rnd:sat +# CHECK-NEXT: e9e0c000 { r0 = cmpyrw(r1:0,r1:0*):<<1:rnd:sat } diff --git a/llvm/test/MC/Hexagon/smallcore_dis.s b/llvm/test/MC/Hexagon/smallcore_dis.s new file mode 100644 index 00000000000000..b94169e8645118 --- /dev/null +++ b/llvm/test/MC/Hexagon/smallcore_dis.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv67t -filetype=obj %s | llvm-objdump -d - | FileCheck %s +# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv67t -filetype=obj %s | llvm-objdump -mv67t -d - | FileCheck %s +# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv67t -filetype=obj %s | llvm-objdump -mcpu=hexagonv67t -d - | FileCheck %s + + .text +{ + r1 = memb(r0) + if (p0) memb(r0) = r2 +} + +# CHECK: { r1 = memb(r0+#0) +# CHECK-NEXT: if (p0) memb(r0+#0) = r2 } diff --git a/llvm/test/MC/Hexagon/v67t_align.s b/llvm/test/MC/Hexagon/v67t_align.s new file mode 100644 index 00000000000000..26aff1eee549d1 --- /dev/null +++ b/llvm/test/MC/Hexagon/v67t_align.s @@ -0,0 +1,15 @@ +# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv67t -filetype=obj %s | llvm-objdump -d - | FileCheck %s + +{ r0=r0 } +.align 32 +{ r0=r0 } + +# CHECK: { r0 = r0 +# CHECK: nop +# CHECK: nop } +# CHECK: { nop +# CHECK: nop } +# CHECK: { nop +# CHECK: nop +# CHECK: nop } +# CHECK: { r0 = r0 } diff --git a/llvm/test/MC/Hexagon/v67t_arch.s b/llvm/test/MC/Hexagon/v67t_arch.s new file mode 100644 index 00000000000000..01d6d37cbac9f4 --- /dev/null +++ b/llvm/test/MC/Hexagon/v67t_arch.s @@ -0,0 +1,10 @@ +# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv67t -filetype=obj %s | llvm-objdump -d - | FileCheck %s +# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv67t -mhvx -filetype=obj %s | llvm-objdump -d - | FileCheck %s + +r1=memw(r0) +{ r0=r0 + memw(r0)=r0.new } + +# CHECK: { r1 = memw(r0+#0) } +# CHECK: { r0 = r0 +# CHECK: memw(r0+#0) = r0.new } diff --git a/llvm/test/MC/Hexagon/v67t_option.s b/llvm/test/MC/Hexagon/v67t_option.s new file mode 100644 index 00000000000000..f6a55e9c051669 --- /dev/null +++ b/llvm/test/MC/Hexagon/v67t_option.s @@ -0,0 +1,15 @@ +# RUN: llvm-mc -arch=hexagon -mv67t -filetype=obj %s | llvm-objdump -d - | FileCheck %s +{ r0=r0 } +.align 32 +{ r0=r0 } + +# CHECK: { r0 = r0 +# CHECK: nop +# CHECK: nop } +# CHECK: { nop +# CHECK: nop } +# CHECK: { nop +# CHECK: nop +# CHECK: nop } +# CHECK: { r0 = r0 } +