Skip to content

Commit

Permalink
[HIP][Clang][CodeGen] Add CodeGen support for hipstdpar
Browse files Browse the repository at this point in the history
This patch adds the CodeGen changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. This change relaxes restrictions on what gets emitted on the device path, when compiling in `hipstdpar` mode:

1. Unless a function is explicitly marked `__host__`, it will get emitted, whereas before only `__device__` and `__global__` functions would be emitted;
2. Unsupported builtins are ignored as opposed to being marked as an error, as the decision on their validity is deferred to the `hipstdpar` specific code selection pass;
3. We add a `hipstdpar` specific pass to the opt pipeline, independent of optimisation level:
    - When compiling for the host, iff the user requested it via the `--hipstdpar-interpose-alloc` flag, we add a pass which replaces canonical allocation / deallocation functions with accelerator aware equivalents.

A test to validate that unannotated functions get correctly emitted is added as well.

Reviewed by: yaxunl, efriedma

Differential Revision: https://reviews.llvm.org/D155850
  • Loading branch information
AlexVlx committed Oct 17, 2023
1 parent be9bc54 commit dd5d65a
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 9 deletions.
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
Expand Down Expand Up @@ -1108,6 +1109,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
return;
}

if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
LangOpts.HIPStdParInterposeAlloc)
MPM.addPass(HipStdParAllocationInterpositionPass());

// Now that we have all of the passes ready, run them.
{
PrettyStackTraceString CrashInfo("Optimizer");
Expand Down
26 changes: 26 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2327,6 +2327,19 @@ static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
return nullptr;
}

static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
const FunctionDecl *FD) {
auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);

SmallVector<Value *, 16> Args;
for (auto &&FormalTy : FnTy->params())
Args.push_back(llvm::PoisonValue::get(FormalTy));

return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}

RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
Expand Down Expand Up @@ -5765,6 +5778,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
}

if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
return EmitHipStdParUnsupportedBuiltin(this, FD);

ErrorUnsupported(E, "builtin function");

// Unknown builtin, for now just dump it out and return undef.
Expand All @@ -5775,6 +5791,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch) {
// When compiling in HipStdPar mode we have to be conservative in rejecting
// target specific features in the FE, and defer the possible error to the
// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
// referenced by an accelerator executable function, we emit an error.
// Returning nullptr here leads to the builtin being handled in
// EmitStdParUnsupportedBuiltin.
if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
Arch != CGF->getTarget().getTriple().getArch())
return nullptr;

switch (Arch) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
Expand Down
37 changes: 33 additions & 4 deletions clang/lib/CodeGen/CGStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2420,6 +2420,24 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
}
}

static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF,
const AsmStmt &S) {
constexpr auto Name = "__ASM__hipstdpar_unsupported";

StringRef Asm;
if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S))
Asm = GCCAsm->getAsmString()->getString();

auto &Ctx = CGF->CGM.getLLVMContext();

auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm);
auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
{StrTy->getType()}, false);
auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);

CGF->Builder.CreateCall(UBF, {StrTy});
}

void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Pop all cleanup blocks at the end of the asm statement.
CodeGenFunction::RunCleanupsScope Cleanups(*this);
Expand All @@ -2431,27 +2449,38 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos;

for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
bool IsValidTargetAsm = true;
for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) {
StringRef Name;
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
Name = GAS->getOutputName(i);
TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
assert(IsValid && "Failed to parse output constraint");
if (IsHipStdPar && !IsValid)
IsValidTargetAsm = false;
else
assert(IsValid && "Failed to parse output constraint");
OutputConstraintInfos.push_back(Info);
}

for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) {
StringRef Name;
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
Name = GAS->getInputName(i);
TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
bool IsValid =
getTarget().validateInputConstraint(OutputConstraintInfos, Info);
assert(IsValid && "Failed to parse input constraint"); (void)IsValid;
if (IsHipStdPar && !IsValid)
IsValidTargetAsm = false;
else
assert(IsValid && "Failed to parse input constraint");
InputConstraintInfos.push_back(Info);
}

if (!IsValidTargetAsm)
return EmitHipStdParUnsupportedAsm(this, S);

std::string Constraints;

std::vector<LValue> ResultRegDests;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
Extensions
FrontendHLSL
FrontendOpenMP
HIPStdPar
IPO
IRPrinter
IRReader
Expand Down
12 changes: 9 additions & 3 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2594,10 +2594,15 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
std::string MissingFeature;
llvm::StringMap<bool> CallerFeatureMap;
CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD);
// When compiling in HipStdPar mode we have to be conservative in rejecting
// target specific features in the FE, and defer the possible error to the
// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
// referenced by an accelerator executable function, we emit an error.
bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
if (BuiltinID) {
StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
if (!Builtin::evaluateRequiredTargetFeatures(
FeatureList, CallerFeatureMap)) {
FeatureList, CallerFeatureMap) && !IsHipStdPar) {
CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
<< TargetDecl->getDeclName()
<< FeatureList;
Expand Down Expand Up @@ -2630,7 +2635,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
return false;
}
return true;
}))
}) && !IsHipStdPar)
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
<< FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
} else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) {
Expand All @@ -2639,7 +2644,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,

for (const auto &F : CalleeFeatureMap) {
if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) ||
!CallerFeatureMap.find(F.getKey())->getValue()))
!CallerFeatureMap.find(F.getKey())->getValue()) &&
!IsHipStdPar)
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
<< FD->getDeclName() << TargetDecl->getDeclName() << F.getKey();
}
Expand Down
7 changes: 5 additions & 2 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3526,7 +3526,7 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject(
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
Emitter.finalize(GV);

return ConstantAddress(GV, GV->getValueType(), Alignment);
return ConstantAddress(GV, GV->getValueType(), Alignment);
}

ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
Expand Down Expand Up @@ -3585,7 +3585,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
!Global->hasAttr<CUDAConstantAttr>() &&
!Global->hasAttr<CUDASharedAttr>() &&
!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
!Global->getType()->isCUDADeviceBuiltinTextureType())
!Global->getType()->isCUDADeviceBuiltinTextureType() &&
!(LangOpts.HIPStdPar &&
isa<FunctionDecl>(Global) &&
!Global->hasAttr<CUDAHostAttr>()))
return;
} else {
// We need to emit host-side 'shadows' for all global
Expand Down
19 changes: 19 additions & 0 deletions clang/test/CodeGenHipStdPar/unannotated-functions-get-emitted.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \
// RUN: -o - %s | FileCheck --check-prefix=NO-HIPSTDPAR-DEV %s

// RUN: %clang_cc1 --hipstdpar -emit-llvm -fcuda-is-device \
// RUN: -o - %s | FileCheck --check-prefix=HIPSTDPAR-DEV %s

#define __device__ __attribute__((device))

// NO-HIPSTDPAR-DEV-NOT: define {{.*}} void @foo({{.*}})
// HIPSTDPAR-DEV: define {{.*}} void @foo({{.*}})
extern "C" void foo(float *a, float b) {
*a = b;
}

// NO-HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
// HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
extern "C" __device__ void bar(float *a, float b) {
*a = b;
}
10 changes: 10 additions & 0 deletions clang/test/CodeGenHipStdPar/unsupported-ASM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s

#define __global__ __attribute__((global))

__global__ void foo(int i) {
asm ("addl %2, %1; seto %b0" : "=q" (i), "+g" (i) : "r" (i));
}

// CHECK: declare void @__ASM__hipstdpar_unsupported([{{.*}}])
8 changes: 8 additions & 0 deletions clang/test/CodeGenHipStdPar/unsupported-builtins.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s

#define __global__ __attribute__((global))

__global__ void foo() { return __builtin_ia32_pause(); }

// CHECK: declare void @__builtin_ia32_pause__hipstdpar_unsupported()

0 comments on commit dd5d65a

Please sign in to comment.