Skip to content

Commit

Permalink
[AMDGPU] Non hostcall printf support for HIP
Browse files Browse the repository at this point in the history
This is an alternative to currently existing hostcall implementation and uses printf buffer similar to OpenCL,
The data stored in the buffer (i.e the data frame) for each printf call are as follows,
1. Control DWord - contains info regarding stream, format string constness and size of data frame
2. Hash of the format string (if constant) else the format string itself
3. Printf arguments (each aligned to 8 byte boundary)

The format string Hash is generated using LLVM's MD5 Message-Digest Algorithm implementation and only low 64 bits are used.
The implementation still uses amdhsa metadata and hash is stored as part of format string itself to ensure
minimal changes in runtime.

Differential Revision: https://reviews.llvm.org/D150427
  • Loading branch information
vikramRH committed Jun 10, 2023
1 parent 55aeb23 commit 631c965
Show file tree
Hide file tree
Showing 13 changed files with 892 additions and 17 deletions.
6 changes: 6 additions & 0 deletions clang/docs/ReleaseNotes.rst
Expand Up @@ -588,6 +588,12 @@ AMDGPU Support
--undefined`` if using an offloading language.
- The deprecated ``-mcode-object-v3`` and ``-mno-code-object-v3`` command-line
options have been removed.
- A new option ``-mprintf-kind`` has been introduced that controls printf lowering
scheme. It is currently supported only for HIP and takes following values,
``hostcall`` - printing happens during kernel execution via series of hostcalls,
The scheme requires the system to support pcie atomics.(default)
``buffered`` - Scheme uses a debug buffer to populate printf varargs, does not
rely on pcie atomics support.

X86 Support
^^^^^^^^^^^
Expand Down
13 changes: 13 additions & 0 deletions clang/include/clang/Basic/TargetOptions.h
Expand Up @@ -90,6 +90,19 @@ class TargetOptions {
/// \brief Code object version for AMDGPU.
CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;

/// \brief Enumeration values for AMDGPU printf lowering scheme
enum class AMDGPUPrintfKind {
/// printf lowering scheme involving hostcalls, currently used by HIP
/// programs by default
Hostcall = 0,

/// printf lowering scheme involving implicit printf buffers,
Buffered = 1,
};

/// \brief AMDGPU Printf lowering scheme
AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::Hostcall;

// The code model to be used as specified by the user. Corresponds to
// CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus
// "default" for the case when the user has not explicitly specified a
Expand Down
11 changes: 11 additions & 0 deletions clang/include/clang/Driver/Options.td
Expand Up @@ -1037,6 +1037,17 @@ defm cuda_short_ptr : BoolFOption<"cuda-short-ptr",
TargetOpts<"NVPTXUseShortPointers">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Use 32-bit pointers for accessing const/local/shared address spaces">,
NegFlag<SetFalse>>;
def mprintf_kind_EQ : Joined<["-"], "mprintf-kind=">, Group<m_Group>,
HelpText<"Specify the printf lowering scheme (AMDGPU only), allowed values are "
"\"hostcall\"(printing happens during kernel execution, this scheme "
"relies on hostcalls which require system to support pcie atomics) "
"and \"buffered\"(printing happens after all kernel threads exit, "
"this uses a printf buffer and does not rely on pcie atomic support)">,
Flags<[CC1Option]>,
Values<"hostcall,buffered">,
NormalizedValuesScope<"TargetOptions::AMDGPUPrintfKind">,
NormalizedValues<["Hostcall", "Buffered"]>,
MarshallingInfoEnum<TargetOpts<"AMDGPUPrintfKindVal">, "Hostcall">;
def fgpu_default_stream_EQ : Joined<["-"], "fgpu-default-stream=">,
HelpText<"Specify default stream. The default value is 'legacy'. (HIP only)">,
Flags<[CC1Option]>,
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/CodeGen/CGGPUBuiltin.cpp
Expand Up @@ -202,7 +202,10 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {

llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args);

bool isBuffered = (CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal ==
clang::TargetOptions::AMDGPUPrintfKind::Buffered);
auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered);
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
return RValue::get(Printf);
}
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Expand Up @@ -611,6 +611,17 @@ void CodeGenModule::Release() {
"amdgpu_code_object_version",
getTarget().getTargetOpts().CodeObjectVersion);
}

// Currently, "-mprintf-kind" option is only supported for HIP
if (LangOpts.HIP) {
auto *MDStr = llvm::MDString::get(
getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal ==
TargetOptions::AMDGPUPrintfKind::Hostcall)
? "hostcall"
: "buffered");
getModule().addModuleFlag(llvm::Module::Error, "amdgpu_printf_kind",
MDStr);
}
}

// Emit a global array containing all external kernels or device variables
Expand Down
17 changes: 17 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -4694,8 +4694,25 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back("-aux-triple");
CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));

if (JA.isDeviceOffloading(Action::OFK_HIP) &&
getToolChain().getTriple().isAMDGPU()) {
// Device side compilation printf
if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
CmdArgs.push_back(Args.MakeArgString(
"-mprintf-kind=" +
Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
// Force compiler error on invalid conversion specifiers
CmdArgs.push_back(
Args.MakeArgString("-Werror=format-invalid-specifier"));
}
}
}

// Unconditionally claim the printf option now to avoid unused diagnostic.
if (const Arg *PF = Args.getLastArg(options::OPT_mprintf_kind_EQ))
PF->claim();

if (Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) {
CmdArgs.push_back("-fsycl-is-device");

Expand Down
6 changes: 4 additions & 2 deletions clang/test/CodeGenHIP/default-attributes.hip
Expand Up @@ -47,8 +47,10 @@ __global__ void kernel() {
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
//.
// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPTNONE: !1 = !{i32 1, !"wchar_size", i32 4}
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
//.
// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPT: !1 = !{i32 1, !"wchar_size", i32 4}
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
//.
17 changes: 17 additions & 0 deletions clang/test/CodeGenHIP/printf-kind-module-flag.hip
@@ -0,0 +1,17 @@
// Create module flag for printf kind.

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -o - %s | FileCheck %s -check-prefix=HOSTCALL

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mprintf-kind=hostcall -o - %s | FileCheck %s -check-prefix=HOSTCALL

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mprintf-kind=buffered -o - %s | FileCheck -check-prefix=BUFFERED %s

// RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mprintf-kind=none -o - %s 2>&1| FileCheck %s -check-prefix=INV

// HOSTCALL: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// BUFFERED: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"buffered"}
// INV: error: invalid value 'none' in '-mprintf-kind=none'

0 comments on commit 631c965

Please sign in to comment.