Skip to content

Commit

Permalink
[AMDGPU][MC] Corrected bound_ctrl for compatibility with sp3
Browse files Browse the repository at this point in the history
Enabled "bound_ctrl:1" and disabled "bound_ctrl:-1" syntax.
Corrected printer to output "bound_ctrl:1" instead of "bound_ctrl:0".
See bug 35397 for detailed issue description.

Differential Revision: https://reviews.llvm.org/D97048
  • Loading branch information
dpreobra committed Feb 22, 2021
1 parent 7af9ea5 commit 4813518
Show file tree
Hide file tree
Showing 24 changed files with 652 additions and 629 deletions.
2 changes: 1 addition & 1 deletion llvm/docs/AMDGPUModifierSyntax.rst
Expand Up @@ -1186,7 +1186,7 @@ invalid lanes is disabled.
======================================== ================================================
Syntax Description
======================================== ================================================
bound_ctrl:0 Enables data sharing with invalid lanes.
bound_ctrl:1 Enables data sharing with invalid lanes.

Accessing data from an invalid lane will
return zero.
Expand Down
11 changes: 4 additions & 7 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Expand Up @@ -7076,17 +7076,14 @@ static bool ConvertOmodDiv(int64_t &Div) {
return false;
}

// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
// This is intentional and ensures compatibility with sp3.
// See bug 35397 for details.
static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
if (BoundCtrl == 0) {
if (BoundCtrl == 0 || BoundCtrl == 1) {
BoundCtrl = 1;
return true;
}

if (BoundCtrl == -1) {
BoundCtrl = 0;
return true;
}

return false;
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Expand Up @@ -921,7 +921,7 @@ void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
if (Imm) {
O << " bound_ctrl:0"; // XXX - this syntax is used in sp3
O << " bound_ctrl:1";
}
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
Expand Up @@ -14,7 +14,7 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; GFX8-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
Expand All @@ -26,7 +26,7 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; encoding: [0x80,0x02,0x02,0x7e]
; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; encoding: [0x04,0x02,0x00,0x7e]
; GFX10-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
; GFX10-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
; GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x00,0x02,0x00]
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
Expand Up @@ -672,9 +672,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
; SI-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[4:5]
; SI-NEXT: v_mov_b32_e32 v1, v0
; SI-NEXT: s_nop 1
; SI-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; SI-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; SI-NEXT: s_nop 1
; SI-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; SI-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
Expand Down Expand Up @@ -718,9 +718,9 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
; GFX9-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX9-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
; GFX9-NEXT: s_and_b64 exec, exec, s[0:1]
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
Expand Down Expand Up @@ -763,8 +763,8 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
; GFX10-32-NEXT: s_mov_b32 s1, s0
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s1
; GFX10-32-NEXT: v_mov_b32_e32 v1, v0
; GFX10-32-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-32-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0
; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
Expand Down Expand Up @@ -807,8 +807,8 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
; GFX10-64-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
; GFX10-64-NEXT: v_mov_b32_e32 v1, v0
; GFX10-64-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-64-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1]
; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
Expand Down Expand Up @@ -908,9 +908,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; SI-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[6:7]
; SI-NEXT: v_mov_b32_e32 v3, v2
; SI-NEXT: s_nop 1
; SI-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; SI-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; SI-NEXT: s_nop 1
; SI-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; SI-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; SI-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; SI-NEXT: s_and_b64 s[6:7], s[0:1], vcc
Expand Down Expand Up @@ -973,9 +973,9 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX9-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v3, v2
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX9-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX9-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX9-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX9-NEXT: s_and_b64 s[4:5], s[0:1], vcc
Expand Down Expand Up @@ -1036,8 +1036,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-32-NEXT: s_mov_b32 s2, s0
; GFX10-32-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
; GFX10-32-NEXT: v_mov_b32_e32 v3, v2
; GFX10-32-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-32-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-32-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-32-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-32-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX10-32-NEXT: s_and_b32 s2, s0, vcc_lo
Expand Down Expand Up @@ -1099,8 +1099,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
; GFX10-64-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX10-64-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
; GFX10-64-NEXT: v_mov_b32_e32 v3, v2
; GFX10-64-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-64-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX10-64-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-64-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX10-64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX10-64-NEXT: s_and_b64 s[4:5], s[0:1], vcc
Expand Down

0 comments on commit 4813518

Please sign in to comment.