Skip to content

Commit

Permalink
[AMDGPU] Check for CopyToReg PhysReg clobbers in pre-RA-sched
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D128681
  • Loading branch information
jrbyrnes committed Jun 30, 2022
1 parent 11c43cd commit 09424f8
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 10 deletions.
35 changes: 28 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
Expand Up @@ -440,17 +440,29 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
std::vector<SUnit*> &LiveRegDefs,
std::vector<SUnit *> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
const TargetRegisterInfo *TRI) {
const TargetRegisterInfo *TRI,
const SDNode *Node = nullptr) {
bool Added = false;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
if (RegAdded.insert(*AI).second) {
LRegs.push_back(*AI);
Added = true;
}
// Check if Ref is live.
if (!LiveRegDefs[*AI])
continue;

// Allow multiple uses of the same def.
if (LiveRegDefs[*AI] == SU)
continue;

// Allow multiple uses of same def
if (Node && LiveRegDefs[*AI]->getNode() == Node)
continue;

// Add Reg to the set of interfering live regs.
if (RegAdded.insert(*AI).second) {
LRegs.push_back(*AI);
Added = true;
}
}
return Added;
Expand Down Expand Up @@ -502,6 +514,15 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
continue;
}

if (Node->getOpcode() == ISD::CopyToReg) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (Reg.isPhysical()) {
SDNode *SrcNode = Node->getOperand(2).getNode();
CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode);
}
}

if (!Node->isMachineOpcode())
continue;
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
Expand Down
19 changes: 16 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
Expand Up @@ -1294,11 +1294,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,

/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SUnit **LiveRegDefs,
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
const TargetRegisterInfo *TRI) {
const TargetRegisterInfo *TRI,
const SDNode *Node = nullptr) {
for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {

// Check if Ref is live.
Expand All @@ -1307,6 +1307,10 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
// Allow multiple uses of the same def.
if (LiveRegDefs[*AliasI] == SU) continue;

// Allow multiple uses of same def
if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
continue;

// Add Reg to the set of interfering live regs.
if (RegAdded.insert(*AliasI).second) {
LRegs.push_back(*AliasI);
Expand Down Expand Up @@ -1387,6 +1391,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
continue;
}

if (Node->getOpcode() == ISD::CopyToReg) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (Reg.isPhysical()) {
SDNode *SrcNode = Node->getOperand(2).getNode();
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
SrcNode);
}
}

if (!Node->isMachineOpcode())
continue;
// If we're in the middle of scheduling a call, don't begin scheduling
Expand Down
64 changes: 64 additions & 0 deletions llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll
@@ -0,0 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=source < %s | FileCheck -check-prefix=RRLIST %s
; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=fast < %s | FileCheck -check-prefix=FAST %s


define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %pout.coerce) {
; RRLIST-LABEL: sccClobber:
; RRLIST: ; %bb.0: ; %entry
; RRLIST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
; RRLIST-NEXT: v_mov_b32_e32 v2, 0
; RRLIST-NEXT: s_waitcnt lgkmcnt(0)
; RRLIST-NEXT: s_load_dword s16, s[8:9], 0x0
; RRLIST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
; RRLIST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
; RRLIST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44
; RRLIST-NEXT: s_load_dword s17, s[10:11], 0x0
; RRLIST-NEXT: s_waitcnt lgkmcnt(0)
; RRLIST-NEXT: s_min_i32 s4, s16, 0
; RRLIST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; RRLIST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
; RRLIST-NEXT: s_and_b64 s[0:1], vcc, exec
; RRLIST-NEXT: s_cselect_b32 s0, s16, s17
; RRLIST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]
; RRLIST-NEXT: s_cselect_b32 s0, s4, s0
; RRLIST-NEXT: v_mov_b32_e32 v0, s0
; RRLIST-NEXT: global_store_dword v2, v0, s[14:15]
; RRLIST-NEXT: s_endpgm
;
; FAST-LABEL: sccClobber:
; FAST: ; %bb.0: ; %entry
; FAST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24
; FAST-NEXT: v_mov_b32_e32 v2, 0
; FAST-NEXT: s_waitcnt lgkmcnt(0)
; FAST-NEXT: s_load_dword s16, s[8:9], 0x0
; FAST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
; FAST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0
; FAST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44
; FAST-NEXT: s_load_dword s17, s[10:11], 0x0
; FAST-NEXT: s_waitcnt lgkmcnt(0)
; FAST-NEXT: s_min_i32 s4, s16, 0
; FAST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; FAST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
; FAST-NEXT: s_and_b64 s[0:1], vcc, exec
; FAST-NEXT: s_cselect_b32 s0, s16, s17
; FAST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3]
; FAST-NEXT: s_cselect_b32 s0, s4, s0
; FAST-NEXT: v_mov_b32_e32 v0, s0
; FAST-NEXT: global_store_dword v2, v0, s[14:15]
; FAST-NEXT: s_endpgm
entry:
%i = load i64, ptr addrspace(1) %a, align 8
%i.1 = load i64, ptr addrspace(1) %b, align 8
%i.2 = load i32, ptr addrspace(1) %e, align 4
%i.3 = load i32, ptr addrspace(1) %f, align 4
%cmp7.1 = icmp eq i64 %i, %i.1
%call.1 = tail call noundef i32 @llvm.smin.i32(i32 noundef 0, i32 noundef %i.2)
%cmp8.1 = icmp slt i64 %i, %i.1
%cond.1 = select i1 %cmp8.1, i32 %i.2, i32 %i.3
%cond14.1 = select i1 %cmp7.1, i32 %call.1, i32 %cond.1
store i32 %cond14.1, ptr addrspace(1) %pout.coerce, align 4
ret void
}

declare i32 @llvm.smin.i32(i32, i32)

0 comments on commit 09424f8

Please sign in to comment.