-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Add GFX12 s_sleep_var instruction and intrinsic #75499
Merged
+81
−0
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Jay Foad (jayfoad) ChangesFull diff: https://github.com/llvm/llvm-project/pull/75499.diff 7 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 09e88152e65d2a..b1aefc1777f855 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1715,6 +1715,12 @@ def int_amdgcn_s_sleep :
IntrHasSideEffects]> {
}
+def int_amdgcn_s_sleep_var
+ : ClangBuiltin<"__builtin_amdgcn_s_sleep_var">,
+ Intrinsic<[], [llvm_i32_ty],
+ [IntrNoMem, IntrHasSideEffects, IntrWillReturn]> {
+}
+
def int_amdgcn_s_nop :
DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects]> {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 03b6d19b2b3c06..d0c1302c3f003c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3101,6 +3101,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyDefaultMapping(OpdMapper);
constrainOpWithReadfirstlane(B, MI, 8); // M0
return;
+ case Intrinsic::amdgcn_s_sleep_var:
+ assert(OpdMapper.getVRegs(1).empty());
+ constrainOpWithReadfirstlane(B, MI, 1);
+ return;
case Intrinsic::amdgcn_s_barrier_signal_var:
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
@@ -4849,6 +4853,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); // %data1
break;
}
+ case Intrinsic::amdgcn_s_sleep_var:
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ break;
case Intrinsic::amdgcn_s_barrier_signal_var:
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d4746b559d9256..03ffe8e10f4bbd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6564,6 +6564,19 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
}
}
+ // Legalize s_sleep_var.
+ if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ int Src0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src0);
+ Src0.ChangeToRegister(Reg, false);
+ return nullptr;
+ }
+
// Legalize MUBUF instructions.
bool isSoffsetLegal = true;
int SoffsetIdx =
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 50c4d279cfe23d..c51534cdbd3054 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1602,6 +1602,10 @@ def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
"$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
}
+def S_SLEEP_VAR : SOP1_0_32 <"s_sleep_var", [(int_amdgcn_s_sleep_var SSrc_b32:$src0)]> {
+ let hasSideEffects = 1;
+}
+
def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
[(int_amdgcn_s_setprio timm:$simm16)]> {
}
@@ -1997,6 +2001,7 @@ defm S_GET_BARRIER_STATE_IMM : SOP1_Real_gfx12<0x050>;
defm S_BARRIER_INIT_IMM : SOP1_Real_gfx12<0x051>;
defm S_BARRIER_JOIN_IMM : SOP1_Real_gfx12<0x052>;
defm S_WAKEUP_BARRIER_IMM : SOP1_Real_gfx12<0x057>;
+defm S_SLEEP_VAR : SOP1_Real_gfx12<0x058>;
//===----------------------------------------------------------------------===//
// SOP1 - GFX1150, GFX12
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
new file mode 100644
index 00000000000000..5ad7ddfbe5fe9d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=0 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN %s
+
+declare void @llvm.amdgcn.s.sleep.var(i32)
+
+define void @test_s_sleep_var1(i32 %arg) {
+; GCN-LABEL: test_s_sleep_var1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_readfirstlane_b32 s0, v0
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT: s_sleep_var s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ call void @llvm.amdgcn.s.sleep.var(i32 %arg)
+ ret void
+}
+
+define void @test_s_sleep_var2() {
+; GCN-LABEL: test_s_sleep_var2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_sleep_var 10
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ call void @llvm.amdgcn.s.sleep.var(i32 10)
+ ret void
+}
+
+define amdgpu_kernel void @test_s_sleep_var3(i32 %arg) {
+; GCN-LABEL: test_s_sleep_var3:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_b32 s0, s[0:1], 0x24
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_sleep_var s0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.sleep.var(i32 %arg)
+ ret void
+}
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
index 8f2944586ed290..495a2ea78ffef7 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
@@ -1,5 +1,11 @@
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
+s_sleep_var 0x1234
+// GFX12: encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
+
+s_sleep_var s1
+// GFX12: encoding: [0x01,0x58,0x80,0xbe]
+
s_cvt_f32_i32 s5, s1
// GFX12: encoding: [0x01,0x64,0x85,0xbe]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
index 1c31ee1e5dd7fb..d15a329c8eade7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
@@ -1,5 +1,11 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
+# GFX12: s_sleep_var 0x1234 ; encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
+0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00
+
+# GFX12: s_sleep_var s1 ; encoding: [0x01,0x58,0x80,0xbe]
+0x01,0x58,0x80,0xbe
+
# GFX12: s_cvt_f32_i32 s5, s1 ; encoding: [0x01,0x64,0x85,0xbe]
0x01,0x64,0x85,0xbe
|
piotrAMD
approved these changes
Dec 14, 2023
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.