Skip to content

Commit

Permalink
[AMDGPU] Fixed v_swap_b32 match
Browse files Browse the repository at this point in the history
1. Fixed liveness issue with implicit kills.
2. Fixed potential problem with an indirect mov.

Fixes: SWDEV-256848

Differential Revision: https://reviews.llvm.org/D89599
  • Loading branch information
rampitec committed Oct 21, 2020
1 parent b6e4aae commit 611959f
Show file tree
Hide file tree
Showing 2 changed files with 275 additions and 10 deletions.
62 changes: 53 additions & 9 deletions llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,22 @@ getSubRegForIndex(Register Reg, unsigned Sub, unsigned I,
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
}

static void dropInstructionKeepingImpDefs(MachineInstr &MI,
const SIInstrInfo *TII) {
for (unsigned i = MI.getDesc().getNumOperands() +
MI.getDesc().getNumImplicitUses() +
MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
i != e; ++i) {
const MachineOperand &Op = MI.getOperand(i);
if (!Op.isDef())
continue;
BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
}

MI.eraseFromParent();
}

// Match:
// mov t, x
// mov x, y
Expand Down Expand Up @@ -476,18 +492,25 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
if (!TRI.isVGPR(MRI, X))
return nullptr;

if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0))
return nullptr;

const unsigned SearchLimit = 16;
unsigned Count = 0;
bool KilledT = false;
for (auto Iter = std::next(MovT.getIterator()),
E = MovT.getParent()->instr_end();
Iter != E && Count < SearchLimit; ++Iter, ++Count) {
Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {

MachineInstr *MovY = &*Iter;
KilledT = MovY->killsRegister(T, &TRI);

if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
MovY->getOpcode() != AMDGPU::COPY) ||
!MovY->getOperand(1).isReg() ||
MovY->getOperand(1).getReg() != T ||
MovY->getOperand(1).getSubReg() != Tsub)
MovY->getOperand(1).getSubReg() != Tsub ||
MovY->hasRegisterImplicitUseOperand(AMDGPU::M0))
continue;

Register Y = MovY->getOperand(0).getReg();
Expand Down Expand Up @@ -521,32 +544,53 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
MovX = nullptr;
break;
}
// Implicit use of M0 is an indirect move.
if (I->hasRegisterImplicitUseOperand(AMDGPU::M0))
continue;

if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0 : 1)))
continue;

MovX = &*I;
}

if (!MovX)
continue;

LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);

for (unsigned I = 0; I < Size; ++I) {
TargetInstrInfo::RegSubRegPair X1, Y1;
X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
TII->get(AMDGPU::V_SWAP_B32))
MachineBasicBlock &MBB = *MovT.getParent();
auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
TII->get(AMDGPU::V_SWAP_B32))
.addDef(X1.Reg, 0, X1.SubReg)
.addDef(Y1.Reg, 0, Y1.SubReg)
.addReg(Y1.Reg, 0, Y1.SubReg)
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
// Drop implicit EXEC.
MIB->RemoveOperand(MIB->getNumExplicitOperands());
MIB->copyImplicitOps(*MBB.getParent(), *MovX);
}
}
MovX->eraseFromParent();
MovY->eraseFromParent();
dropInstructionKeepingImpDefs(*MovY, TII);
MachineInstr *Next = &*std::next(MovT.getIterator());
if (MRI.use_nodbg_empty(T))
MovT.eraseFromParent();
else

if (MRI.use_nodbg_empty(T)) {
dropInstructionKeepingImpDefs(MovT, TII);
} else {
Xop.setIsKill(false);
for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
unsigned OpNo = MovT.getNumExplicitOperands() + I;
const MachineOperand &Op = MovT.getOperand(OpNo);
if (Op.isKill() && TRI.regsOverlap(X, Op.getReg()))
MovT.RemoveOperand(OpNo);
}
}

return Next;
}
Expand Down
223 changes: 222 additions & 1 deletion llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,9 @@ body: |
...

# GCN-LABEL: name: swap_virt_copy_subreg_impdef_super
# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
# GCN: %2:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1
# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
---
name: swap_virt_copy_subreg_impdef_super
registers:
Expand Down Expand Up @@ -672,3 +674,222 @@ body: |
%1 = COPY %2
S_ENDPGM 0
...

# GCN-LABEL: name: swap_liveness_error_mov
# GCN: $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec
# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
# GCN-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec

---
name: swap_liveness_error_mov
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr5, $vgpr1_vgpr2
$vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit killed $vgpr1_vgpr2
$vgpr1 = V_MOV_B32_e32 killed $vgpr5, implicit $exec
$vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
$vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
$vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec
S_ENDPGM 0
...

# GCN-LABEL: name: swap_liveness_error_copy
# GCN: $vgpr6 = COPY $vgpr1
# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
# GCN-NEXT: $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
# GCN-NEXT: $vgpr5 = COPY $vgpr6

---
name: swap_liveness_error_copy
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr5, $vgpr1_vgpr2
$vgpr6 = COPY $vgpr1, implicit killed $vgpr1_vgpr2
$vgpr1 = COPY killed $vgpr5
$vgpr5 = COPY $vgpr6, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
$vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
$vgpr5 = COPY $vgpr6
S_ENDPGM 0
...

# GCN-LABEL: name: swap_killed_t_early
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec

---
name: swap_killed_t_early
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
$vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: name: swap_killed_t_late
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec

---
name: swap_killed_t_late
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
$vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: name: swap_killed_x
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec

---
name: swap_killed_x
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: name: indirect_mov_t
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1

---
name: indirect_mov_t
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: name: indirect_mov_x
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1

---
name: indirect_mov_x
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: name: indirect_mov_y
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1

---
name: indirect_mov_y
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: name: implicit_ops_mov_x_swap_b32
# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2

---
name: implicit_ops_mov_x_swap_b32
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2
$vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: name: implict_ops_mov_x_swap_b64
# GCN: %2:vreg_64 = COPY %0
# GCN-NEXT: %0:vreg_64 = COPY %1, implicit $vgpr0
# GCN-NEXT: %1:vreg_64 = COPY %2

---
name: implict_ops_mov_x_swap_b64
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = COPY %0
%0 = COPY %1, implicit $vgpr0
%1 = COPY %2
...

# GCN-LABEL: implicit_ops_mov_t_swap_b32
# GCN: $vgpr1 = IMPLICIT_DEF
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec

---
name: implicit_ops_mov_t_swap_b32
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2, implicit-def $vgpr1
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

# GCN-LABEL: implicit_ops_mov_y_swap_b32
# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
# GCN-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF

---
name: implicit_ops_mov_y_swap_b32
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
$vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec, implicit $vgpr2, implicit-def $vgpr0_vgpr1, implicit killed $vgpr3
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
...

0 comments on commit 611959f

Please sign in to comment.