64 changes: 41 additions & 23 deletions llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//

#include "SIPreAllocateWWMRegs.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Expand All @@ -34,7 +35,7 @@ static cl::opt<bool>

namespace {

class SIPreAllocateWWMRegs : public MachineFunctionPass {
class SIPreAllocateWWMRegs {
private:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
Expand All @@ -48,45 +49,49 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
#ifndef NDEBUG
void printWWMInfo(const MachineInstr &MI);
#endif
bool processDef(MachineOperand &MO);
void rewriteRegs(MachineFunction &MF);

public:
SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
VirtRegMap *VRM)
: LIS(LIS), Matrix(Matrix), VRM(VRM) {}
bool run(MachineFunction &MF);
};

class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
public:
static char ID;

SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
}
SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervalsWrapperPass>();
AU.addRequired<VirtRegMapWrapperLegacy>();
AU.addRequired<LiveRegMatrix>();
AU.addRequired<LiveRegMatrixWrapperLegacy>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}

private:
bool processDef(MachineOperand &MO);
void rewriteRegs(MachineFunction &MF);
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
"SI Pre-allocate WWM Registers", false, false)
INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
"SI Pre-allocate WWM Registers", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
"SI Pre-allocate WWM Registers", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
"SI Pre-allocate WWM Registers", false, false)

char SIPreAllocateWWMRegs::ID = 0;
char SIPreAllocateWWMRegsLegacy::ID = 0;

char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;

FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
return new SIPreAllocateWWMRegs();
FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
return new SIPreAllocateWWMRegsLegacy();
}

bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
Expand Down Expand Up @@ -184,7 +189,14 @@ SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {

#endif

bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
}

bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");

const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Expand All @@ -193,10 +205,6 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();

LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
Matrix = &getAnalysis<LiveRegMatrix>();
VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();

RegClassInfo.runOnMachineFunction(MF);

bool PreallocateSGPRSpillVGPRs =
Expand Down Expand Up @@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
rewriteRegs(MF);
return true;
}

PreservedAnalyses
SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
return PreservedAnalyses::all();
}
25 changes: 25 additions & 0 deletions llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===--- SIPreAllocateWWMRegs.h ---------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
#define LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {

class SIPreAllocateWWMRegsPass
: public PassInfoMixin<SIPreAllocateWWMRegsPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};

} // namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s --check-prefix=CHECK2

# RUN: llc -mtriple=amdgcn -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s --check-prefix=CHECK2

---

name: pre_allocate_wwm_regs_strict
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr1
; CHECK-LABEL: name: pre_allocate_wwm_regs_strict
; CHECK: liveins: $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec
; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
;
; CHECK2-LABEL: name: pre_allocate_wwm_regs_strict
; CHECK2: liveins: $sgpr1
; CHECK2-NEXT: {{ $}}
; CHECK2-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK2-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK2-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK2-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec
; CHECK2-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
; CHECK2-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
%0:vgpr_32 = IMPLICIT_DEF
renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%2:vgpr_32 = V_MOV_B32_dpp %1, %0, 323, 12, 15, 0, implicit $exec
$exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
%3:vgpr_32 = COPY %0
...
---

name: pre_allocate_wwm_spill_to_vgpr
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr1
; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr
; CHECK: liveins: $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: dead [[SI_SPILL_S32_TO_VGPR:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
;
; CHECK2-LABEL: name: pre_allocate_wwm_spill_to_vgpr
; CHECK2: liveins: $sgpr1
; CHECK2-NEXT: {{ $}}
; CHECK2-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK2-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
; CHECK2-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0
%2:vgpr_32 = COPY %0