Skip to content

Commit

Permalink
[AMDGPU] NFC. Assert that mask is full with VOPC DPP
Browse files Browse the repository at this point in the history
VOPC DPP should not be formed when the row_mask and bank_mask are not
0xf (full) because the resulting VOP DPP would have different semantics
than the MOV DPP followed by VOP. Existing checks in GCNDPPCombine cover
this case but for different reasons, so assert the property for
future-proofing.

Reviewed By: nhaehnle

Differential Revision: https://reviews.llvm.org/D130101
  • Loading branch information
Sisyph committed Jul 20, 2022
1 parent b32e600 commit dc850fb
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
13 changes: 12 additions & 1 deletion llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
Expand Up @@ -202,6 +202,18 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
return nullptr;
}
int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
// Prior checks cover Mask with VOPC condition, but not on purpose
auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
assert(RowMaskOpnd && RowMaskOpnd->isImm());
auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
assert(BankMaskOpnd && BankMaskOpnd->isImm());
const bool MaskAllLanes =
RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
assert(MaskAllLanes ||
!(TII->isVOPC(DPPOp) ||
(TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
"VOPC cannot form DPP unless mask is full");

auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
OrigMI.getDebugLoc(), TII->get(DPPOp))
Expand All @@ -222,7 +234,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
// If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
}

int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
if (OldIdx != -1) {
assert(OldIdx == NumOperands);
Expand Down
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
Expand Up @@ -67,3 +67,36 @@ body: |
V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec
...
---

name: mask_not_full
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-LABEL: name: mask_not_full
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
%4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
...

0 comments on commit dc850fb

Please sign in to comment.