1,079 changes: 1,029 additions & 50 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,18 @@

#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include <memory>
#include <vector>

namespace llvm {

std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(bool IsReentry);
namespace AMDGPU {
// The current phase of instruction scheduling
enum class SchedulingPhase { Initial, PreRAReentry, PostRA };
} // namespace AMDGPU

std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
AMDGPU::SchedulingPhase Phase,
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations);

} // namespace llvm

Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
Expand All @@ -471,7 +472,8 @@ static ScheduleDAGInstrs *
createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
DAG->addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
return DAG;
}

Expand Down Expand Up @@ -934,7 +936,8 @@ class GCNPassConfig final : public AMDGPUPassConfig {
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
DAG->addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/true));
DAG->addMutation(
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA, nullptr));
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
DAG->addMutation(createVOPDPairingMutation());
return DAG;
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,8 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
return false;

SavedMutations.swap(DAG.Mutations);
DAG.addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false));
DAG.addMutation(createIGroupLPDAGMutation(
AMDGPU::SchedulingPhase::PreRAReentry, nullptr));

InitialOccupancy = DAG.MinOccupancy;
// Aggressivly try to reduce register pressure in the unclustered high RP
Expand Down Expand Up @@ -855,7 +856,10 @@ bool GCNSchedStage::initGCNRegion() {
SavedMutations.swap(DAG.Mutations);
bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
StageID == GCNSchedStageID::ILPInitialSchedule;
DAG.addMutation(createIGroupLPDAGMutation(/*IsReentry=*/!IsInitialStage));
DAG.addMutation(createIGroupLPDAGMutation(
IsInitialStage ? AMDGPU::SchedulingPhase::Initial
: AMDGPU::SchedulingPhase::PreRAReentry,
&SavedMutations));
}

return true;
Expand Down Expand Up @@ -1569,7 +1573,8 @@ void GCNPostScheduleDAGMILive::schedule() {
if (HasIGLPInstrs) {
SavedMutations.clear();
SavedMutations.swap(Mutations);
addMutation(createIGroupLPDAGMutation(/*IsReentry=*/true));
addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA,
&SavedMutations));
}

ScheduleDAGMI::schedule();
Expand Down
2,055 changes: 2,055 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir

Large diffs are not rendered by default.

901 changes: 901 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir

Large diffs are not rendered by default.