Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Add GFX12 S_WAIT_* instructions #77336

Merged
merged 1 commit into from Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 35 additions & 7 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Expand Up @@ -1186,14 +1186,12 @@ let SubtargetPredicate = isGFX10Plus in {
let SubtargetPredicate = isGFX10GFX11 in {
def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
} // End SubtargetPredicate = isGFX10GFX11

let SubtargetPredicate = isGFX10Plus in {
def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
def S_WAITCNT_LGKMCNT : SOPK_WAITCNT<"s_waitcnt_lgkmcnt">;
} // End SubtargetPredicate = isGFX10Plus
} // End SubtargetPredicate = isGFX10GFX11

//===----------------------------------------------------------------------===//
// SOPC Instructions
Expand Down Expand Up @@ -1702,6 +1700,27 @@ let SubtargetPredicate = HasVGPRSingleUseHintInsts in {
SOPP_Pseudo<"s_singleuse_vdst", (ins s16imm:$simm16), "$simm16">;
} // End SubtargetPredicate = HasVGPRSingeUseHintInsts

let SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 in {
def S_WAIT_LOADCNT :
SOPP_Pseudo<"s_wait_loadcnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_LOADCNT_DSCNT :
SOPP_Pseudo<"s_wait_loadcnt_dscnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_STORECNT :
SOPP_Pseudo<"s_wait_storecnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_STORECNT_DSCNT :
SOPP_Pseudo<"s_wait_storecnt_dscnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_SAMPLECNT :
SOPP_Pseudo<"s_wait_samplecnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_BVHCNT :
SOPP_Pseudo<"s_wait_bvhcnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_EXPCNT :
SOPP_Pseudo<"s_wait_expcnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_DSCNT :
SOPP_Pseudo<"s_wait_dscnt", (ins s16imm:$simm16), "$simm16">;
def S_WAIT_KMCNT :
SOPP_Pseudo<"s_wait_kmcnt", (ins s16imm:$simm16), "$simm16">;
} // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1

//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -2411,10 +2430,10 @@ defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11_gfx12<0x013>;
defm S_CALL_B64 : SOPK_Real32_gfx11_gfx12<0x014>;
defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx11<0x016>;
defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx11<0x017>;
defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11_gfx12<0x018>;
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11_gfx12<0x019>;
defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11_gfx12<0x01a>;
defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11_gfx12<0x01b>;
defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11<0x018>;
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11<0x019>;
defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11<0x01a>;
defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>;

//===----------------------------------------------------------------------===//
// SOPK - GFX10.
Expand Down Expand Up @@ -2516,6 +2535,15 @@ multiclass SOPP_Real_32_Renamed_gfx12<bits<7> op, SOPP_Pseudo backing_pseudo, st
defm S_WAIT_ALU : SOPP_Real_32_Renamed_gfx12<0x008, S_WAITCNT_DEPCTR, "s_wait_alu">;
defm S_BARRIER_WAIT : SOPP_Real_32_gfx12<0x014>;
defm S_BARRIER_LEAVE : SOPP_Real_32_gfx12<0x015>;
defm S_WAIT_LOADCNT : SOPP_Real_32_gfx12<0x040>;
defm S_WAIT_STORECNT : SOPP_Real_32_gfx12<0x041>;
defm S_WAIT_SAMPLECNT : SOPP_Real_32_gfx12<0x042>;
defm S_WAIT_BVHCNT : SOPP_Real_32_gfx12<0x043>;
defm S_WAIT_EXPCNT : SOPP_Real_32_gfx12<0x044>;
defm S_WAIT_DSCNT : SOPP_Real_32_gfx12<0x046>;
defm S_WAIT_KMCNT : SOPP_Real_32_gfx12<0x047>;
defm S_WAIT_LOADCNT_DSCNT : SOPP_Real_32_gfx12<0x048>;
defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;

//===----------------------------------------------------------------------===//
// SOPP - GFX11, GFX12.
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_err.s
Expand Up @@ -36,6 +36,11 @@ v_interp_p2_f32 v0, -v1, v2, v3 wait_exp
global_atomic_cmpswap_x2 v[1:4], v3, v[5:8], off offset:2047 glc
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

// s_waitcnt_depctr is called s_wait_alu on GFX12, but its semantics and
// encoding are identical. Even so, the new name should be rejected on GFX11
s_wait_alu 0xfffe
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_cubesc_f32_e64_dpp v5, v1, v2, 12345678 row_shr:4 row_mask:0xf bank_mask:0xf
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

Expand Down
54 changes: 54 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
@@ -1,5 +1,59 @@
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s

s_wait_loadcnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc0,0xbf]

s_wait_loadcnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc0,0xbf]

s_wait_storecnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc1,0xbf]

s_wait_storecnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc1,0xbf]

s_wait_samplecnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc2,0xbf]

s_wait_samplecnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc2,0xbf]

s_wait_bvhcnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc3,0xbf]

s_wait_bvhcnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc3,0xbf]

s_wait_expcnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc4,0xbf]

s_wait_expcnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc4,0xbf]

s_wait_dscnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc6,0xbf]

s_wait_dscnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc6,0xbf]

s_wait_kmcnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc7,0xbf]

s_wait_kmcnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc7,0xbf]

s_wait_loadcnt_dscnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc8,0xbf]

s_wait_loadcnt_dscnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc8,0xbf]

s_wait_storecnt_dscnt 0x1234
// GFX12: encoding: [0x34,0x12,0xc9,0xbf]

s_wait_storecnt_dscnt 0xc1d1
// GFX12: encoding: [0xd1,0xc1,0xc9,0xbf]

s_wait_alu 0xfffe
// GFX12: encoding: [0xfe,0xff,0x88,0xbf]

Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_unsupported.s
Expand Up @@ -4,6 +4,18 @@
// Unsupported instructions.
//===----------------------------------------------------------------------===//

s_waitcnt_expcnt exec_hi, 0x1234
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_waitcnt_lgkmcnt exec_hi, 0x1234
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_waitcnt_vmcnt exec_hi, 0x1234
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_waitcnt_vscnt exec_hi, 0x1234
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_subvector_loop_begin s0, 0x1234
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

Expand Down
4 changes: 4 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
Expand Up @@ -10,6 +10,10 @@
# GFX11: [[@LINE+1]]:1: warning: invalid instruction encoding
0x34,0x12,0x93,0xbf

# this is s_waitcnt_vscnt exec_hi, 0x1234, which is valid on gfx11, but not on gfx12
# GFX12: [[@LINE+1]]:1: warning: invalid instruction encoding
0x34,0x12,0x7f,0xbc

# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
# W64: [[@LINE+1]]:1: warning: invalid instruction encoding
0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
Expand Down
54 changes: 54 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
Expand Up @@ -6,6 +6,60 @@
# GFX12: s_wait_alu 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf]
0xfe,0xff,0x88,0xbf

# GFX12: s_wait_loadcnt 0x1234 ; encoding: [0x34,0x12,0xc0,0xbf]
0x34,0x12,0xc0,0xbf

# GFX12: s_wait_loadcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc0,0xbf]
0xd1,0xc1,0xc0,0xbf

# GFX12: s_wait_storecnt 0x1234 ; encoding: [0x34,0x12,0xc1,0xbf]
0x34,0x12,0xc1,0xbf

# GFX12: s_wait_storecnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc1,0xbf]
0xd1,0xc1,0xc1,0xbf

# GFX12: s_wait_samplecnt 0x1234 ; encoding: [0x34,0x12,0xc2,0xbf]
0x34,0x12,0xc2,0xbf

# GFX12: s_wait_samplecnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc2,0xbf]
0xd1,0xc1,0xc2,0xbf

# GFX12: s_wait_bvhcnt 0x1234 ; encoding: [0x34,0x12,0xc3,0xbf]
0x34,0x12,0xc3,0xbf

# GFX12: s_wait_bvhcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc3,0xbf]
0xd1,0xc1,0xc3,0xbf

# GFX12: s_wait_expcnt 0x1234 ; encoding: [0x34,0x12,0xc4,0xbf]
0x34,0x12,0xc4,0xbf

# GFX12: s_wait_expcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc4,0xbf]
0xd1,0xc1,0xc4,0xbf

# GFX12: s_wait_dscnt 0x1234 ; encoding: [0x34,0x12,0xc6,0xbf]
0x34,0x12,0xc6,0xbf

# GFX12: s_wait_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc6,0xbf]
0xd1,0xc1,0xc6,0xbf

# GFX12: s_wait_kmcnt 0x1234 ; encoding: [0x34,0x12,0xc7,0xbf]
0x34,0x12,0xc7,0xbf

# GFX12: s_wait_kmcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc7,0xbf]
0xd1,0xc1,0xc7,0xbf

# GFX12: s_wait_loadcnt_dscnt 0x1234 ; encoding: [0x34,0x12,0xc8,0xbf]
0x34,0x12,0xc8,0xbf

# GFX12: s_wait_loadcnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc8,0xbf]
0xd1,0xc1,0xc8,0xbf

# GFX12: s_wait_storecnt_dscnt 0x1234 ; encoding: [0x34,0x12,0xc9,0xbf]
0x34,0x12,0xc9,0xbf

# GFX12: s_wait_storecnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc9,0xbf]
0xd1,0xc1,0xc9,0xbf

# GFX12: s_singleuse_vdst 0x0 ; encoding: [0x00,0x00,0x93,0xbf]
0x00,0x00,0x93,0xbf

Expand Down