From 961e4384f4e938b901490912813ff0e8347cc3c0 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 16 Mar 2021 11:57:45 -0700 Subject: [PATCH] [AMDGPU] Support SCC on buffer atomics Differential Revision: https://reviews.llvm.org/D98731 --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 23 +++++++++++-------- llvm/test/MC/AMDGPU/gfx90a_asm_features.s | 4 ++++ llvm/test/MC/AMDGPU/gfx90a_err.s | 15 ++++++++++++ .../AMDGPU/gfx90a_dasm_features.txt | 3 +++ 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 6a760bac311b5..d367969702e3c 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -679,7 +679,7 @@ class MUBUF_Atomic_Pseudo; // GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// -class MUBUF_Real_Base_vi op, MUBUF_Pseudo ps, int Enc> : +class MUBUF_Real_Base_vi op, MUBUF_Pseudo ps, int Enc, + bit has_sccb = ps.has_sccb> : MUBUF_Real, Enc64, SIMCInstr, @@ -2270,7 +2271,7 @@ class MUBUF_Real_Base_vi op, MUBUF_Pseudo ps, int Enc> : let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); - let Inst{15} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); + let Inst{15} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); let Inst{16} = ps.lds; let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{24-18} = op; @@ -2281,26 +2282,28 @@ class MUBUF_Real_Base_vi op, MUBUF_Pseudo ps, int Enc> : let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -class MUBUF_Real_vi op, MUBUF_Pseudo ps> : - MUBUF_Real_Base_vi { +class MUBUF_Real_vi op, MUBUF_Pseudo ps, bit has_sccb = ps.has_sccb> : + MUBUF_Real_Base_vi { let AssemblerPredicate = isGFX8GFX9NotGFX90A; let DecoderNamespace = "GFX8"; let Inst{55} = !if(ps.has_tfe, tfe, ?); } -class MUBUF_Real_gfx90a op, MUBUF_Pseudo ps> : - MUBUF_Real_Base_vi { +class MUBUF_Real_gfx90a op, MUBUF_Pseudo ps, + bit has_sccb = ps.has_sccb> : + MUBUF_Real_Base_vi { let AssemblerPredicate = isGFX90APlus; let DecoderNamespace = "GFX90A"; - let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands); + let AsmString = ps.Mnemonic # !subst("$sccb", !if(has_sccb, "$sccb",""), + !subst("$tfe", "", ps.AsmOperands)); let Inst{55} = acc; } multiclass MUBUF_Real_vi_gfx90a op, MUBUF_Pseudo ps> { def _vi : MUBUF_Real_vi; - def _gfx90a : MUBUF_Real_gfx90a; + def _gfx90a : MUBUF_Real_gfx90a; } multiclass MUBUF_Real_AllAddr_vi op> { @@ -2483,7 +2486,7 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; } // End SubtargetPredicate = HasAtomicFaddInsts -let SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus in { +let SubtargetPredicate = isGFX90APlus in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>; defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_vi<0x50>; defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_vi<0x51>; diff --git a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s index 38fa212175d84..fac42fd900ced 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s @@ -1046,3 +1046,7 @@ global_atomic_add_f32 v1, v0, v2, s[0:1] glc ; encoding: [0x00,0x80,0x35,0xdd,0x // GFX1010: error: instruction not supported on this GPU // GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x39,0xdd,0x00,0x02,0x7f,0x00] global_atomic_pk_add_f16 v0, v[0:1], v2, off glc + +// NOT-GFX90A: error: scc modifier is not supported on this GPU +// GFX90A: buffer_atomic_add v4, off, s[8:11], s3 scc ; encoding: [0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03] +buffer_atomic_add v4, off, s[8:11], s3 scc diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s index 15df69b05a171..44c48595ca17c 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -231,6 +231,21 @@ global_atomic_min_f64 v[0:1], v[2:3], off scc global_atomic_max_f64 v[0:1], v[2:3], off scc // GFX90A: error: instruction must not use scc +buffer_atomic_add_f32 v4, off, s[8:11], s3 scc +// GFX90A: error: instruction must not use scc + +buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 scc +// GFX90A: error: instruction must not use scc + +buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 scc +// GFX90A: error: instruction must not use scc + +buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 scc +// GFX90A: error: instruction must not use scc + +buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 scc +// GFX90A: error: instruction must not use scc + v_mov_b32_sdwa v1, src_lds_direct dst_sel:DWORD // GFX90A: error: lds_direct is not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt index cc007a6cd4ca6..bc5c6509d738c 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt @@ -793,3 +793,6 @@ # GFX90A: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc ; encoding: [0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00] 0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00 + +# GFX90A: buffer_atomic_add v4, off, s[8:11], s3 scc ; encoding: [0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03] +0x00,0x80,0x08,0xe1,0x00,0x04,0x02,0x03