diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td index 0c610a4839f8aa..be859193581087 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA57.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -183,6 +183,11 @@ class A57BranchForm : // TODO: according to the doc, conditional uses I0/I1, unconditional uses M // Why more complex instruction uses more simple pipeline? // May be an error in doc. +def A57WriteALUsi : SchedWriteVariant<[ + // lsl #2, lsl #1, or lsr #1. + SchedVar>]>, + SchedVar>]> +]>; def A57WriteALUsr : SchedWriteVariant<[ SchedVar>]>, SchedVar>]> @@ -195,7 +200,7 @@ def A57ReadALUsr : SchedReadVariant<[ SchedVar, SchedVar ]>; -def : SchedAlias>>; +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir b/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir deleted file mode 100644 index 1b40a0b8effb18..00000000000000 --- a/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir +++ /dev/null @@ -1,35 +0,0 @@ -# REQUIRES: asserts -# RUN: llc -mcpu=cortex-a57 -mtriple=thumb -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s - -# CHECK-LABEL: ********** MI Scheduling ********** -# CHECK: %[[RES:[0-9]+]]:rgpr = t2MLA -# CHECK-NEXT: # preds left -# CHECK-NEXT: # succs left -# CHECK-NEXT: # rdefs left -# CHECK-NEXT: Latency : 3 -# CHECK-NEXT: Depth -# CHECK-NEXT: Height -# CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg= -# CHECK-NEXT: SU({{.*}}): Out Latency= -# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg= -# CHECK-NEXT: Successors: -# CHECK-NEXT: SU([[SMLA_SU:[0-9]+]]): Data Latency=1 Reg=%[[RES]] -# CHECK-NEXT: Pressure Diff -# CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU([[SMLA_SU]]): {{.*}} = t2SMLAL %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr(tied-def 0), %[[RES]]:rgpr(tied-def 1), 14, $noreg - -name: test_smlal_forwarding -tracksRegLiveness: true -body: | - bb.0: - liveins: $r1, $r3, $r4, $r5, $r6 - %1:rgpr = COPY $r1 - %3:rgpr = COPY $r3 - %4:rgpr = COPY $r4 - %5:rgpr = COPY $r5 - %6:rgpr = COPY $r6 - %3:rgpr = t2MLA %4:rgpr, %1:rgpr, %4:rgpr, 14, $noreg - %6:rgpr, %5:rgpr = t2SMLAL %5:rgpr, %6:rgpr, %4:rgpr, %3:rgpr, 14, $noreg - $r0 = COPY %6:rgpr - BX_RET 14, $noreg, implicit $r0 diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp index fa84199974ffa8..82ca35f37e8b35 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/CodeGenSchedule.cpp @@ -1338,7 +1338,8 @@ class PredTransitions { PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {} bool substituteVariantOperand(const SmallVectorImpl &RWSeq, - bool IsRead, unsigned StartIdx); + bool IsRead, bool IsForAnyCPU, + unsigned StartIdx); bool substituteVariants(const PredTransition &Trans); @@ -1412,6 +1413,29 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef, return false; } +static bool hasAliasedVariants(const CodeGenSchedRW &RW, + CodeGenSchedModels &SchedModels) { + if (RW.HasVariants) + return true; + + for (Record *Alias : RW.Aliases) { + const CodeGenSchedRW &AliasRW = + SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW")); + if (AliasRW.HasVariants) + return true; + if (AliasRW.IsSequence) { + IdxVec ExpandedRWs; + SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead); + for (unsigned SI : ExpandedRWs) { + if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead), + SchedModels)) + return true; + } + } + } + return false; +} + static std::vector getAllPredicates(ArrayRef Variants, ArrayRef ProcIndices) { std::vector Preds; @@ -1589,7 +1613,21 @@ pushVariant(const TransVariant &VInfo, bool IsRead) { // starts. RWSeq must be applied to all transitions between StartIdx and the end // of TransVec. bool PredTransitions::substituteVariantOperand( - const SmallVectorImpl &RWSeq, bool IsRead, unsigned StartIdx) { + const SmallVectorImpl &RWSeq, bool IsRead, bool IsForAnyCPU, + unsigned StartIdx) { + + auto CollectAndAddVariants = [&](unsigned TransIdx, + const CodeGenSchedRW &SchedRW) { + // Distribute this partial PredTransition across intersecting variants. + // This will push a copies of TransVec[TransIdx] on the back of TransVec. + std::vector IntersectingVariants; + getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants); + // Now expand each variant on top of its copy of the transition. + for (const TransVariant &IV : IntersectingVariants) + pushVariant(IV, IsRead); + return !IntersectingVariants.empty(); + }; + bool Subst = false; // Visit each original RW within the current sequence. for (SmallVectorImpl::const_iterator @@ -1598,24 +1636,35 @@ bool PredTransitions::substituteVariantOperand( // Push this RW on all partial PredTransitions or distribute variants. // New PredTransitions may be pushed within this loop which should not be // revisited (TransEnd must be loop invariant). + bool HasAliases = false, WasPushed = false; for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size(); TransIdx != TransEnd; ++TransIdx) { - // Distribute this partial PredTransition across intersecting variants. - // This will push a copies of TransVec[TransIdx] on the back of TransVec. - std::vector IntersectingVariants; - getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants); - // Now expand each variant on top of its copy of the transition. - for (const TransVariant &IV : IntersectingVariants) - pushVariant(IV, IsRead); - if (IntersectingVariants.empty()) { + // In the common case, push RW onto the current operand's sequence. + if (!hasAliasedVariants(SchedRW, SchedModels)) { if (IsRead) TransVec[TransIdx].ReadSequences.back().push_back(*RWI); else TransVec[TransIdx].WriteSequences.back().push_back(*RWI); continue; - } else { - Subst = true; } + HasAliases = true; + WasPushed |= CollectAndAddVariants(TransIdx, SchedRW); + Subst |= WasPushed; + } + if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) { + // If we're here this means that in some sched class: + // a) We have read variant for CPU A + // b) We have write variant for CPU B + // b) We don't have write variant for CPU A + // d) We must expand all read/write variants (IsForAnyCPU is true) + // e) We couldn't expand SchedRW because TransVec doesn't have + // any transition with compatible CPU ID. + // In such case we create new empty transition with zero (AnyCPU) + // index. + TransVec.reserve(TransVec.size() + 1); + TransVec.emplace_back(TransVec[StartIdx].PredTerm); + TransVec.back().ReadSequences.emplace_back(); + Subst |= CollectAndAddVariants(TransVec.size() - 1, SchedRW); } } return Subst; @@ -1634,7 +1683,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) { bool Subst = false; TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices); - assert(!llvm::count(Trans.ProcIndices, 0)); + bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0); // Visit each original write sequence. for (SmallVectorImpl>::const_iterator WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end(); @@ -1644,7 +1693,8 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) { TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) { I->WriteSequences.emplace_back(); } - Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx); + Subst |= + substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx); } // Visit each original read sequence. for (SmallVectorImpl>::const_iterator @@ -1655,7 +1705,8 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) { TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) { I->ReadSequences.emplace_back(); } - Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx); + Subst |= + substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx); } return Subst; } @@ -1694,10 +1745,6 @@ static void inferFromTransitions(ArrayRef LastTransitions, // requires creating a new SchedClass. for (ArrayRef::iterator I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) { - // Variant expansion (substituteVariants) may create unconditional - // transitions. We don't need to build sched classes for them. - if (I->PredTerm.empty()) - continue; IdxVec OperWritesVariant, OperReadsVariant; addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false); addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true); @@ -1730,26 +1777,6 @@ static void inferFromTransitions(ArrayRef LastTransitions, } } -std::vector CodeGenSchedModels::getAllProcIndices() const { - std::vector ProcIdVec; - for (const auto &PM : ProcModelMap) - if (PM.second != 0) - ProcIdVec.push_back(PM.second); - return ProcIdVec; -} - -static std::vector -makePerProcessorTransitions(const PredTransition &Trans, - ArrayRef ProcIndices) { - std::vector PerCpuTransVec; - for (unsigned ProcId : ProcIndices) { - assert(ProcId != 0); - PerCpuTransVec.push_back(Trans); - PerCpuTransVec.back().ProcIndices.assign(1, ProcId); - } - return PerCpuTransVec; -} - // Create new SchedClasses for the given ReadWrite list. If any of the // ReadWrites refers to a SchedVariant, create a new SchedClass for each variant // of the ReadWrite list, following Aliases if necessary. @@ -1785,10 +1812,6 @@ void CodeGenSchedModels::inferFromRW(ArrayRef OperWrites, } LLVM_DEBUG(dbgs() << '\n'); - LastTransitions = makePerProcessorTransitions( - LastTransitions[0], llvm::count(ProcIndices, 0) - ? ArrayRef(getAllProcIndices()) - : ProcIndices); // Collect all PredTransitions for individual operands. // Iterate until no variant writes remain. bool SubstitutedAny; @@ -1800,6 +1823,9 @@ void CodeGenSchedModels::inferFromRW(ArrayRef OperWrites, LLVM_DEBUG(Transitions.dump()); LastTransitions.swap(Transitions.TransVec); } while (SubstitutedAny); + // If the first transition has no variants, nothing to do. + if (LastTransitions[0].PredTerm.empty()) + return; // WARNING: We are about to mutate the SchedClasses vector. Do not refer to // OperWrites, OperReads, or ProcIndices after calling inferFromTransitions. diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h index 5df99b25add11a..62a359e0888ca5 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.h +++ b/llvm/utils/TableGen/CodeGenSchedule.h @@ -443,7 +443,6 @@ class CodeGenSchedModels { InstClassMapTy InstrClassMap; std::vector STIPredicates; - std::vector getAllProcIndices() const; public: CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);