Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Add GFX12 s_sleep_var instruction and intrinsic #75499

Merged
merged 1 commit into from
Dec 14, 2023
Merged

[AMDGPU] Add GFX12 s_sleep_var instruction and intrinsic #75499

merged 1 commit into from
Dec 14, 2023

Conversation

jayfoad
Copy link
Contributor

@jayfoad jayfoad commented Dec 14, 2023

No description provided.

@llvmbot llvmbot added backend:AMDGPU mc Machine (object) code llvm:ir labels Dec 14, 2023
@llvmbot
Copy link
Collaborator

llvmbot commented Dec 14, 2023

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-mc

Author: Jay Foad (jayfoad)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/75499.diff

7 Files Affected:

  • (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+6)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+7)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+13)
  • (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+5)
  • (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll (+38)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_sop1.s (+6)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt (+6)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 09e88152e65d2a..b1aefc1777f855 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1715,6 +1715,12 @@ def int_amdgcn_s_sleep :
                                 IntrHasSideEffects]> {
 }
 
+def int_amdgcn_s_sleep_var
+    : ClangBuiltin<"__builtin_amdgcn_s_sleep_var">,
+      Intrinsic<[], [llvm_i32_ty],
+                [IntrNoMem, IntrHasSideEffects, IntrWillReturn]> {
+}
+
 def int_amdgcn_s_nop :
   DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
                                 IntrHasSideEffects]> {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 03b6d19b2b3c06..d0c1302c3f003c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3101,6 +3101,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       applyDefaultMapping(OpdMapper);
       constrainOpWithReadfirstlane(B, MI, 8); // M0
       return;
+    case Intrinsic::amdgcn_s_sleep_var:
+      assert(OpdMapper.getVRegs(1).empty());
+      constrainOpWithReadfirstlane(B, MI, 1);
+      return;
     case Intrinsic::amdgcn_s_barrier_signal_var:
     case Intrinsic::amdgcn_s_barrier_join:
     case Intrinsic::amdgcn_s_wakeup_barrier:
@@ -4849,6 +4853,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
           getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); // %data1
       break;
     }
+    case Intrinsic::amdgcn_s_sleep_var:
+      OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+      break;
     case Intrinsic::amdgcn_s_barrier_signal_var:
     case Intrinsic::amdgcn_s_barrier_join:
     case Intrinsic::amdgcn_s_wakeup_barrier:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d4746b559d9256..03ffe8e10f4bbd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6564,6 +6564,19 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
     }
   }
 
+  // Legalize s_sleep_var.
+  if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
+    const DebugLoc &DL = MI.getDebugLoc();
+    Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+    int Src0Idx =
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+    MachineOperand &Src0 = MI.getOperand(Src0Idx);
+    BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+        .add(Src0);
+    Src0.ChangeToRegister(Reg, false);
+    return nullptr;
+  }
+
   // Legalize MUBUF instructions.
   bool isSoffsetLegal = true;
   int SoffsetIdx =
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 50c4d279cfe23d..c51534cdbd3054 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1602,6 +1602,10 @@ def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
   "$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
 }
 
+def S_SLEEP_VAR : SOP1_0_32 <"s_sleep_var", [(int_amdgcn_s_sleep_var SSrc_b32:$src0)]> {
+  let hasSideEffects = 1;
+}
+
 def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
   [(int_amdgcn_s_setprio timm:$simm16)]> {
 }
@@ -1997,6 +2001,7 @@ defm S_GET_BARRIER_STATE_IMM      : SOP1_Real_gfx12<0x050>;
 defm S_BARRIER_INIT_IMM           : SOP1_Real_gfx12<0x051>;
 defm S_BARRIER_JOIN_IMM           : SOP1_Real_gfx12<0x052>;
 defm S_WAKEUP_BARRIER_IMM         : SOP1_Real_gfx12<0x057>;
+defm S_SLEEP_VAR                  : SOP1_Real_gfx12<0x058>;
 
 //===----------------------------------------------------------------------===//
 // SOP1 - GFX1150, GFX12
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
new file mode 100644
index 00000000000000..5ad7ddfbe5fe9d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=0 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN %s
+
+declare void @llvm.amdgcn.s.sleep.var(i32)
+
+define void @test_s_sleep_var1(i32 %arg) {
+; GCN-LABEL: test_s_sleep_var1:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_readfirstlane_b32 s0, v0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    s_sleep_var s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  call void @llvm.amdgcn.s.sleep.var(i32 %arg)
+  ret void
+}
+
+define void @test_s_sleep_var2() {
+; GCN-LABEL: test_s_sleep_var2:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_sleep_var 10
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  call void @llvm.amdgcn.s.sleep.var(i32 10)
+  ret void
+}
+
+define amdgpu_kernel void @test_s_sleep_var3(i32 %arg) {
+; GCN-LABEL: test_s_sleep_var3:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_b32 s0, s[0:1], 0x24
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_sleep_var s0
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.sleep.var(i32 %arg)
+  ret void
+}
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
index 8f2944586ed290..495a2ea78ffef7 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
@@ -1,5 +1,11 @@
 // RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
 
+s_sleep_var 0x1234
+// GFX12: encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
+
+s_sleep_var s1
+// GFX12: encoding: [0x01,0x58,0x80,0xbe]
+
 s_cvt_f32_i32 s5, s1
 // GFX12: encoding: [0x01,0x64,0x85,0xbe]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
index 1c31ee1e5dd7fb..d15a329c8eade7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
@@ -1,5 +1,11 @@
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
 
+# GFX12: s_sleep_var 0x1234                   ; encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
+0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00
+
+# GFX12: s_sleep_var s1                       ; encoding: [0x01,0x58,0x80,0xbe]
+0x01,0x58,0x80,0xbe
+
 # GFX12: s_cvt_f32_i32 s5, s1                    ; encoding: [0x01,0x64,0x85,0xbe]
 0x01,0x64,0x85,0xbe
 

Copy link
Collaborator

@piotrAMD piotrAMD left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@jayfoad jayfoad merged commit 3e6da32 into llvm:main Dec 14, 2023
6 of 7 checks passed
@jayfoad jayfoad deleted the gfx12-sleep-var branch December 14, 2023 21:11
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:ir mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants