Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Add VDSDIR instructions for GFX12 #75197

Merged
merged 7 commits into from
Jan 3, 2024

Conversation

mbrkusanin
Copy link
Collaborator

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 12, 2023

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Mirko Brkušanin (mbrkusanin)

Changes

Patch is 33.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75197.diff

10 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+16)
  • (modified) llvm/lib/Target/AMDGPU/LDSDIRInstructions.td (+94-24)
  • (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+20)
  • (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (+4)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+2)
  • (modified) llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp (+3-1)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll (+20-16)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll (+23-16)
  • (added) llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s (+199)
  • (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vdsdir.txt (+206)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 92427335c0ad2..cb2ea1f144e58 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -166,6 +166,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     ImmTyEndpgm,
     ImmTyWaitVDST,
     ImmTyWaitEXP,
+    ImmTyWaitVAVDst,
+    ImmTyWaitVMVSrc,
   };
 
   // Immediate operand kind.
@@ -908,6 +910,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   bool isEndpgm() const;
   bool isWaitVDST() const;
   bool isWaitEXP() const;
+  bool isWaitVAVDst() const;
+  bool isWaitVMVSrc() const;
 
   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
     return std::bind(P, *this);
@@ -1028,6 +1032,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   }
 
   static void printImmTy(raw_ostream& OS, ImmTy Type) {
+    // clang-format off
     switch (Type) {
     case ImmTyNone: OS << "None"; break;
     case ImmTyGDS: OS << "GDS"; break;
@@ -1085,7 +1090,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     case ImmTyEndpgm: OS << "Endpgm"; break;
     case ImmTyWaitVDST: OS << "WaitVDST"; break;
     case ImmTyWaitEXP: OS << "WaitEXP"; break;
+    case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
+    case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
     }
+    // clang-format on
   }
 
   void print(raw_ostream &OS) const override {
@@ -9130,6 +9138,14 @@ bool AMDGPUOperand::isWaitVDST() const {
   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
 }
 
+bool AMDGPUOperand::isWaitVAVDst() const {
+  return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
+}
+
+bool AMDGPUOperand::isWaitVMVSrc() const {
+  return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
+}
+
 //===----------------------------------------------------------------------===//
 // VINTERP
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
index 4956a15867749..572798ae10c2b 100644
--- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
+++ b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
@@ -1,4 +1,4 @@
-//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
+//===-- LDSDIRInstructions.td - LDS/DS Direct Instruction Definitions -----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// LDSDIR encoding
+// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+)
 //===----------------------------------------------------------------------===//
 
 class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
@@ -27,8 +27,27 @@ class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
   let Inst{7-0} = vdst;
 }
 
+class VDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+  // encoding fields
+  bits<2> attrchan;
+  bits<6> attr;
+  bits<4> waitvdst;
+  bits<8> vdst;
+  bits<1> waitvsrc;
+
+  // encoding
+  let Inst{31-24} = 0xce; // encoding
+  let Inst{23} = waitvsrc;
+  let Inst{22} = 0x0; // reserved
+  let Inst{21-20} = op;
+  let Inst{19-16} = waitvdst;
+  let Inst{15-10} = !if(is_direct, ?, attr);
+  let Inst{9-8} = !if(is_direct, ?, attrchan);
+  let Inst{7-0} = vdst;
+}
+
 //===----------------------------------------------------------------------===//
-// LDSDIR Classes
+// LDSDIR/VDSDIR Classes
 //===----------------------------------------------------------------------===//
 
 class LDSDIR_getIns<bit direct> {
@@ -38,10 +57,15 @@ class LDSDIR_getIns<bit direct> {
   );
 }
 
-class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
-    (outs VGPR_32:$vdst),
-    LDSDIR_getIns<direct>.ret,
-    asm> {
+class VDSDIR_getIns<bit direct> {
+  dag ret = !if(direct,
+    (ins WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc),
+    (ins InterpAttr:$attr, InterpAttrChan:$attrchan, WaitVAVDst:$waitvdst, WaitVMVSrc:$waitvsrc)
+  );
+}
+
+class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> : InstSI<
+    (outs VGPR_32:$vdst), ins, asm> {
   let LDSDIR = 1;
   let EXP_CNT = 1;
 
@@ -60,8 +84,8 @@ class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
   let is_direct = direct;
 }
 
-class LDSDIR_Pseudo<string opName, bit direct> :
-  LDSDIR_Common<opName, "", direct>,
+class DSDIR_Pseudo<string opName, dag ins, bit direct> :
+  DSDIR_Common<opName, "", ins, direct>,
   SIMCInstr<opName, SIEncodingFamily.NONE> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
@@ -74,22 +98,31 @@ class LDSDIR_getAsm<bit direct> {
   );
 }
 
-class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
-  LDSDIR_Common<lds.Mnemonic,
-                lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
-                lds.is_direct>,
-  SIMCInstr <lds.Mnemonic, subtarget>,
-  LDSDIRe<op, lds.is_direct> {
+class VDSDIR_getAsm<bit direct> {
+  string ret = !if(direct,
+    " $vdst$waitvdst$waitvsrc",
+    " $vdst, $attr$attrchan$waitvdst$waitvsrc"
+  );
+}
+
+class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> :
+  DSDIR_Common<lds.Mnemonic,
+               lds.Mnemonic # asm,
+               ins,
+               lds.is_direct>,
+  SIMCInstr <lds.Mnemonic, subtarget> {
   let isPseudo = 0;
   let isCodeGenOnly = 0;
 }
 
 //===----------------------------------------------------------------------===//
-// LDS Direct Instructions
+// LDS/DS Direct Instructions
 //===----------------------------------------------------------------------===//
 
-def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
-def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
+let SubtargetPredicate = isGFX11Only in {
+
+def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>;
+def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>;
 
 def : GCNPat <
   (f32 (int_amdgcn_lds_direct_load M0)),
@@ -101,16 +134,53 @@ def : GCNPat <
   (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
 >;
 
+} // End SubtargetPredicate = isGFX11Only
+
+let SubtargetPredicate = isGFX12Plus in {
+
+def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>;
+def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_direct_load M0)),
+  (DS_DIRECT_LOAD 0, 1)
+>;
+
+def : GCNPat <
+  (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+  (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1)
+>;
+
+} // End SubtargetPredicate = isGFX12Only
+
 //===----------------------------------------------------------------------===//
-// GFX11+
+// GFX11
 //===----------------------------------------------------------------------===//
 
-multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
-  def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
-    let AssemblerPredicate = isGFX11Plus;
+multiclass DSDIR_Real_gfx11<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+  def _gfx11 : DSDIR_Real<lds, lds.InOperandList,
+                           LDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX11>,
+               LDSDIRe<op, lds.is_direct> {
+    let AssemblerPredicate = isGFX11Only;
     let DecoderNamespace = "GFX11";
   }
 }
 
-defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
-defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;
+defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>;
+defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>;
+
+//===----------------------------------------------------------------------===//
+// GFX12+
+//===----------------------------------------------------------------------===//
+
+multiclass DSDIR_Real_gfx12<bits<2> op, DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> {
+  def _gfx12 : DSDIR_Real<lds, lds.InOperandList,
+                           VDSDIR_getAsm<lds.is_direct>.ret, SIEncodingFamily.GFX12>,
+               VDSDIRe<op, lds.is_direct> {
+    let AssemblerPredicate = isGFX12Plus;
+    let DecoderNamespace = "GFX12";
+  }
+}
+
+defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>;
+defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 57f74ae08b35c..d99d343f81c40 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -631,6 +631,26 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
   printU4ImmDecOperand(MI, OpNo, O);
 }
 
+void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  uint8_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm != 0) {
+    O << " wait_va_vdst:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  uint8_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm != 0) {
+    O << " wait_vm_vsrc:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  }
+}
+
 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
                                     const MCSubtargetInfo &STI,
                                     raw_ostream &O) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 95c26de6299ef..f2f985fa5b1a8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -161,6 +161,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                     raw_ostream &O);
   void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O);
+  void printWaitVAVDst(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O, unsigned N);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9e60bdda5ef3a..e9a5beb99498b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1129,6 +1129,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
 
 def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
 def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
+def WaitVAVDst : NamedIntOperand<i8, "wait_va_vdst">;
+def WaitVMVSrc : NamedIntOperand<i8, "wait_vm_vsrc">;
 
 class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
   let OperandNamespace = "AMDGPU";
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 59d6ccf513bb9..5e6c34992930b 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
         }
         continue;
       } else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
-                 Opcode == AMDGPU::LDS_DIRECT_LOAD) {
+                 Opcode == AMDGPU::DS_PARAM_LOAD ||
+                 Opcode == AMDGPU::LDS_DIRECT_LOAD ||
+                 Opcode == AMDGPU::DS_DIRECT_LOAD) {
         // Mark these STRICTWQM, but only for the instruction, not its operands.
         // This avoid unnecessarily marking M0 as requiring WQM.
         InstrInfo &II = Instructions[&MI];
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 313bd8525c6fd..195c5dabb4d46 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -1,23 +1,27 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-; GFX11-LABEL: {{^}}lds_direct_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_direct_load:
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_mov_b32 m0
+; GFX12: ds_direct_load v{{[0-9]+}}
+; GCN: s_mov_b32 m0
 ; GFX11: lds_direct_load v{{[0-9]+}}
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
 define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
                                        i32 inreg %arg1, i32 inreg %arg2) #0 {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 5a8b03e50e2ee..1ab753d75fe03 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -1,25 +1,32 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-; GFX11-LABEL: {{^}}lds_param_load:
-; GFX11: s_mov_b32 m0
+; GCN-LABEL: {{^}}lds_param_load:
+; GCN: s_mov_b32 m0
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w
 ; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x
-; GFX11: s_waitcnt expcnt(4)
-; GFX11: v_add_f32
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(3)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(2)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(1)
-; GFX11: buffer_store_b32
-; GFX11: s_waitcnt expcnt(0)
-; GFX11: buffer_store_b32
-; GFX11: buffer_store_b32
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w
+; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x
+; GCN: s_waitcnt expcnt(4)
+; GCN: v_add_f32
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(3)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(2)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(1)
+; GCN: buffer_store_b32
+; GCN: s_waitcnt expcnt(0)
+; GCN: buffer_store_b32
+; GCN: buffer_store_b32
 define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
 main_body:
   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
new file mode 100644
index 0000000000000..4d40eae1c7e7f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vdsdir.s
@@ -0,0 +1,199 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
+
+ds_direct_load v1 wait_va_vdst:15
+// GFX12: ds_direct_load v1 wait_va_vdst:15  ; encoding: [0x01,0x00,0x1f,0xce]
+
+ds_direct_load v2 wait_va_vdst:14
+// GFX12: ds_direct_load v2 wait_va_vdst:14  ; encoding: [0x02,0x00,0x1e,0xce]
+
+ds_direct_load v3 wait_va_vdst:13
+// GFX12: ds_direct_load v3 wait_va_vdst:13  ; encoding: [0x03,0x00,0x1d,0xce]
+
+ds_direct_load v4 wait_va_vdst:12
+// GFX12: ds_direct_load v4 wait_va_vdst:12  ; encoding: [0x04,0x00,0x1c,0xce]
+
+ds_direct_load v5 wait_va_vdst:11
+// GFX12: ds_direct_load v5 wait_va_vdst:11  ; encoding: [0x05,0x00,0x1b,0xce]
+
+ds_direct_load v6 wait_va_vdst:10
+// GFX12: ds_direct_load v6 wait_va_vdst:10  ; encoding: [0x06,0x00,0x1a,0xce]
+
+ds_direct_load v7 wait_va_vdst:9
+// GFX12: ds_direct_load v7 wait_va_vdst:9   ; encoding: [0x07,0x00,0x19,0xce]
+
+ds_direct_load v8 wait_va_vdst:8
+// GFX12: ds_direct_load v8 wait_va_vdst:8   ; encoding: [0x08,0x00,0x18,0xce]
+
+ds_direct_load v9 wait_va_vdst:7
+// GFX12: ds_direct_load v9 wait_va_vdst:7   ; encoding: [0x09,0x00,0x17,0xce]
+
+ds_direct_load v10 wait_va_vdst:6
+// GFX12: ds_direct_load v10 wait_va_vdst:6  ; encoding: [0x0a,0x00,0x16,0xce]
+
+ds_direct_load v11 wait_va_vdst:5
+// GFX12: ds_direct_load v11 wait_va_vdst:5  ; encoding: [0x0b,0x00,0x15,0xce]
+
+ds_direct_load v12 wait_va_vdst:4
+// GFX12: ds_direct_load v12 wait_va_vdst:4  ; encoding: [0x0c,0x00,0x14,0xce]
+
+ds_direct_load v13 wait_va_vdst:3
+// GFX12: ds_direct_load v13 wait_va_vdst:3  ; encoding: [0x0d,0x00,0x13,0xce]
+
+ds_direct_load v14 wait_va_vdst:2
+// GFX12: ds_direct_load v14 wait_va_vdst:2  ; encoding: [0x0e,0x00,0x12,0xce]
+
+ds_direct_load v15 wait_va_vdst:1
+// GFX12: ds_direct_load v15 wait_va_vdst:1  ; encoding: [0x0f,0x00,0x11,0xce]
+
+ds_direct_load v16 wait_va_vdst:0
+// GFX12: ds_direct_load v16  ; encoding: [0x10,0x00,0x10,0xce]
+
+ds_direct_load v17
+// GFX12: ds_direct_load v17  ; encoding: [0x11,0x00,0x10,0xce]
+
+ds_param_load v1, attr0.x wait_va_vdst:15
+// GFX12: ds_param_load v1, attr0.x wait_va_vdst:15   ; encoding: [0x01,0x00,0x0f,0xce]
+
+ds_param_load v2, attr0.y wait_va_vdst:14
+// GFX12: ds_param_load v2, attr0.y wait_va_vdst:14   ; encoding: [0x02,0x01,0x0e,0xce]
+
+ds_param_load v3, attr0.z wait_va_vdst:13
+// GFX12: ds_param_load v3, attr0.z wait_va_vdst:13   ; encoding: [0x03,0x02,0x0d,0xce]
+
+ds_param_load v4, attr0.w wait_va_vdst:12
+// GFX12: ds_param_load v4, attr0.w wait_va_vdst:12   ; encoding: [0x04,0x03,0x0c,0xce]
+
+ds_param_load v5, attr0.x wait_va_vdst:11
+// GFX12: ds_param_load v5, attr0.x wait_va_vdst:11   ; encoding: [0x05,0x00,0x0b,0xce]
+
+ds_param_load v6, attr1.x wait_va_vdst:10
+// GFX12: ds_param_load v6, attr1.x wait_va_vdst:10   ; encoding: [0x06,0x04,0x0a,0xce]
+
+ds_param_load v7, attr2.y wait_va_vdst:9
+// GFX12: ds_param_load v7, attr2.y wait_va_vdst:9    ; encoding: [0x07,0x09,0x09,0xce]
+
+ds_param_load v8, attr3.z wait_va_vdst:8
+// GFX12: ds_param_load v8, attr3.z wait_va_vdst:8    ; encoding: [0x08,0x0e,0x08,0xce]
+
+ds_param_load v9, attr4.w wait_va_vdst:7
+// GFX12: ds_param_load v9, attr4.w wait_va_vdst:7    ; encoding: [0x09,0x13,0x07,0xce]
+
+ds_param_load v10, attr11.x wait_va_vdst:6
+// GFX12: ds_param_load v10, attr11.x wait_va_vdst:6  ; encoding: [0x0a,0x2c,0x06,0xce]
+
+ds_param_load v11, attr22.y wait_va_vdst:5
+// GFX12: ds_param_load v11, attr22.y wait_va_vdst:5  ; encoding: [0x0b,0x59,0x05,0xce]
+
+ds_param_load v13, attr32.x wait_va_vdst:3
+// GFX12: ds_param_load v13, attr32.x wait_va_vdst:3  ; encoding: [0x0d,0x80,0x03,0xce]
+
+ds_param_load v14, attr32.y wait_va_vdst:2
+// GFX12: ds_param_load v14, attr32.y wa...
[truncated]

llvm/lib/Target/AMDGPU/LDSDIRInstructions.td Outdated Show resolved Hide resolved
llvm/lib/Target/AMDGPU/SIInstrInfo.td Outdated Show resolved Hide resolved
llvm/lib/Target/AMDGPU/LDSDIRInstructions.td Outdated Show resolved Hide resolved
@mbrkusanin
Copy link
Collaborator Author

Ping

Copy link
Contributor

@jayfoad jayfoad left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks.

@mbrkusanin mbrkusanin merged commit 82e33d6 into llvm:main Jan 3, 2024
4 checks passed
@mbrkusanin mbrkusanin deleted the gfx12-vdsdir branch January 3, 2024 15:32
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants