Skip to content

Commit

Permalink
AMDGPU: Use isLiteralConstantLike to check whether the operand could …
Browse files Browse the repository at this point in the history
…ever be literal

Summary:
  To compute the size of a VALU/SALU instruction, we need to check whether an operand
could ever be literal. Previously isLiteralConstant was used, which missed cases
like global variables or external symbols. These misses lead to under-estimation of
the instruction size and branch offset, and thus incorrectly skip the necessary branch
relaxation when the branch offset is actually greater than what the branch bits can hold.
In this work, we use isLiteralConstantLike to check the operands. It maybe conservative,
but it is safe.

Reviewers: arsenm

Differential Revision: https://reviews.llvm.org/D122778
  • Loading branch information
changpeng committed Mar 31, 2022
1 parent 0a46041 commit 1711020
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -7461,7 +7461,9 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return DescSize;
bool HasLiteral = false;
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
if (isLiteralConstant(MI, I)) {
const MachineOperand &Op = MI.getOperand(I);
const MCOperandInfo &OpInfo = Desc.OpInfo[I];
if (isLiteralConstantLike(Op, OpInfo)) {
HasLiteral = true;
break;
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -520,6 +520,7 @@ def : GCNPat<
def SI_CALL : SPseudoInstSI <
(outs SReg_64:$dst), (ins SSrc_b64:$src0, unknown:$callee)> {
let Size = 4;
let FixedSize = 1;
let isCall = 1;
let UseNamedOperandTable = 1;
let SchedRW = [WriteBranch];
Expand All @@ -532,6 +533,7 @@ def SI_TCRETURN : SPseudoInstSI <(outs),
(ins SReg_64:$src0, unknown:$callee, i32imm:$fpdiff),
[(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
let Size = 4;
let FixedSize = 1;
let isCall = 1;
let isTerminator = 1;
let isReturn = 1;
Expand Down
@@ -0,0 +1,39 @@
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -amdgpu-s-branch-bits=6 < %s | FileCheck -check-prefix=GCN %s


; Restrict maximum branch to between +31 and -32 dwords
declare void @llvm.amdgcn.s.sleep(i32) #0

@name1 = external addrspace(1) global i32
@name2 = external addrspace(1) global i32
@name3 = external addrspace(1) global i32

; GCN-LABEL: {{^}}branch_offset_test:
; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0
; GCN-NEXT: s_cbranch_scc0 [[BB2:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: .LBB{{[0-9]+}}_{{[0-9]+}}: ; %bb
; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB3]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
; GCN-NEXT: [[BB2]]: ; %bb2
; GCN-NEXT: s_getpc_b64 s[[[PC_LO]]:[[PC_HI]]]

; GCN: [[BB3]]: ; %bb3
define amdgpu_kernel void @branch_offset_test(i32 addrspace(1)* %arg, i32 %cnd) #0 {
bb:
%cmp = icmp eq i32 %cnd, 0
br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch

bb2:
store i32 1, i32 addrspace(1)* @name1
store i32 2, i32 addrspace(1)* @name2
store i32 3, i32 addrspace(1)* @name3
call void @llvm.amdgcn.s.sleep(i32 0)
br label %bb3

bb3:
store volatile i32 %cnd, i32 addrspace(1)* %arg
ret void
}

0 comments on commit 1711020

Please sign in to comment.