Skip to content

Commit

Permalink
[AMDGPU] Add llvm.amdgcn.cs.chain intrinsic to IR & verifier
Browse files Browse the repository at this point in the history
We only check a subset of the constraints in the verifier:
* that we only call the intrinsic from functions with a restricted set of
calling conventions
* that the 'flags' argument is an immediate

Other checks are (probably) more appropriate for codegen.

Differential Revision: https://reviews.llvm.org/D151995
  • Loading branch information
rovka committed Jun 22, 2023
1 parent 29dcc4c commit 8762bc7
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 0 deletions.
22 changes: 22 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Expand Up @@ -2113,6 +2113,28 @@ def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">,
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>]
>;

// A uniform tail call to a function with the `amdgpu_cs_chain` or
// `amdgpu_cs_chain_preserve` calling convention. It will populate the SGPRs
// starting at s0 and the VGPRs starting at v8, set EXEC and perform a jump to
// the given function.
// Can only be used in functions with the `amdgpu_cs`, `amdgpu_cs_chain` or
// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control
// flow.
def int_amdgcn_cs_chain:
Intrinsic<[],
[llvm_anyptr_ty, // The function to jump to.
llvm_anyint_ty, // Value to put in EXEC (should be i32 or i64).
llvm_any_ty, // Arguments that will be copied into SGPRs (s0+).
// Must be uniform.
llvm_any_ty, // Arguments that will be copied into VGPRs (v8+).
// Need not be uniform.
llvm_i32_ty, // Flags.
llvm_vararg_ty // Additional arguments. Only present if Flags is
// non-zero.
],
[IntrConvergent, IntrNoReturn, ImmArg<ArgIndex<4>>]>;


//===----------------------------------------------------------------------===//
// CI+ Intrinsics
//===----------------------------------------------------------------------===//
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Expand Up @@ -86,6 +86,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
Expand Down Expand Up @@ -5910,6 +5911,22 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
&Call);
break;
}
case Intrinsic::amdgcn_cs_chain: {
auto CallerCC = Call.getCaller()->getCallingConv();
switch (CallerCC) {
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_CS_Chain:
case CallingConv::AMDGPU_CS_ChainPreserve:
break;
default:
CheckFailed("Intrinsic can only be used from functions with the "
"amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve "
"calling conventions",
&Call);
break;
}
break;
}
};

// Verify that there aren't any unmediated control transfers between funclets.
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll
@@ -0,0 +1,46 @@
; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s

declare void @llvm.amdgcn.cs.chain(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) noreturn

define amdgpu_cs_chain void @bad_flags(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) {
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i32 %flags
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags)
unreachable
}

define amdgpu_cs_chain void @bad_exec(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) {
; CHECK: Intrinsic called with incompatible signature
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, <4 x i32>, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, <4 x i32> %sgpr, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags)
unreachable
}

define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
unreachable
}

define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
unreachable
}

define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
unreachable
}

define amdgpu_vs void @bad_caller_amdgpu_vs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
unreachable
}

0 comments on commit 8762bc7

Please sign in to comment.