Skip to content

Commit

Permalink
[OpenMP] Make Ctor / Dtor functions have external visibility
Browse files Browse the repository at this point in the history
The default construction of constructor functions by LLVM tends to make
them have internal linkage. When we call a ctor / dtor function in the
target region we are actually creating a kernel that is called at
registration. Because the ctor is a kernel we need to make sure it's
externally visible so we can actually call it. This prevented AMDGPU
from correctly using constructors while NVPTX could use them simply
because it ignored internal visibility.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D122504
  • Loading branch information
jhuber6 committed Mar 26, 2022
1 parent e13faa4 commit 3c6d32e
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 30 deletions.
8 changes: 4 additions & 4 deletions clang/lib/CodeGen/CGDeclCXX.cpp
Expand Up @@ -425,17 +425,17 @@ void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit,

llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction(
llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI,
SourceLocation Loc, bool TLS) {
llvm::Function *Fn = llvm::Function::Create(
FTy, llvm::GlobalValue::InternalLinkage, Name, &getModule());
SourceLocation Loc, bool TLS, llvm::GlobalVariable::LinkageTypes Linkage) {
llvm::Function *Fn = llvm::Function::Create(FTy, Linkage, Name, &getModule());

if (!getLangOpts().AppleKext && !TLS) {
// Set the section if needed.
if (const char *Section = getTarget().getStaticInitSectionSpecifier())
Fn->setSection(Section);
}

SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
if (Linkage == llvm::GlobalVariable::InternalLinkage)
SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);

Fn->setCallingConv(getRuntimeCC());

Expand Down
8 changes: 4 additions & 4 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Expand Up @@ -1926,7 +1926,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_ctor"), FI, Loc);
FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
llvm::GlobalValue::WeakODRLinkage);
auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
FunctionArgList(), Loc, Loc);
Expand All @@ -1944,7 +1945,6 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
CtorCGF.FinishFunction();
Ctor = Fn;
ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
} else {
Ctor = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
Expand All @@ -1970,7 +1970,8 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
FTy, Twine(Buffer, "_dtor"), FI, Loc);
FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
llvm::GlobalValue::WeakODRLinkage);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
FunctionArgList(), Loc, Loc);
Expand All @@ -1989,7 +1990,6 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
DtorCGF.FinishFunction();
Dtor = Fn;
ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
} else {
Dtor = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CodeGen/CodeGenModule.h
Expand Up @@ -826,7 +826,9 @@ class CodeGenModule : public CodeGenTypeCache {

llvm::Function *CreateGlobalInitOrCleanUpFunction(
llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI,
SourceLocation Loc = SourceLocation(), bool TLS = false);
SourceLocation Loc = SourceLocation(), bool TLS = false,
llvm::GlobalVariable::LinkageTypes Linkage =
llvm::GlobalVariable::InternalLinkage);

/// Return the AST address space of the underlying global variable for D, as
/// determined by its declaration. Normally this is the same as the address
Expand Down
33 changes: 16 additions & 17 deletions clang/test/OpenMP/amdgcn_target_global_constructor.cpp
Expand Up @@ -26,9 +26,8 @@ S A;
// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0
// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
// CHECK: @llvm.used = appending addrspace(1) global [2 x i8*] [i8* bitcast (void ()* @__omp_offloading_{{.*}}_ctor to i8*), i8* bitcast (void ()* @__omp_offloading_{{.*}}_dtor to i8*)], section "llvm.metadata"
//.
// CHECK-LABEL: define {{[^@]+}}@__omp_offloading_{{.*}}_ctor
// CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_613a0d56_A_l19_ctor
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_ZN1SC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR3:[0-9]+]]
Expand All @@ -46,7 +45,7 @@ S A;
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__omp_offloading_{{.*}}_dtor
// CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_613a0d56_A_l19_dtor
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) addrspacecast ([[STRUCT_S:%.*]] addrspace(1)* @A to %struct.S*)) #[[ATTR4:[0-9]+]]
Expand Down Expand Up @@ -87,19 +86,19 @@ S A;
// CHECK-NEXT: ret void
//
//.
// CHECK: attributes #0 = { convergent
// CHECK: attributes #1 = { convergent
// CHECK: attributes #2 = { convergent
// CHECK: attributes #3 = { convergent
// CHECK: attributes #4 = { convergent
// CHECK: attributes #0 = { nounwind "kernel" "min-legal-vector-width"="0" }
// CHECK: attributes #1 = { convergent noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #2 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #3 = { convergent }
// CHECK: attributes #4 = { convergent nounwind }
//.
// CHECK-DAG: !{i32 0, {{.*}}, !"__omp_offloading_{{.*}}_ctor", i32 19, i32 1}
// CHECK-DAG: !{i32 0, {{.*}}, !"__omp_offloading_{{.*}}_dtor", i32 19, i32 2}
// CHECK-DAG: !{i32 1, !"A", i32 0, i32 0}
// CHECK-DAG: !{void ()* @__omp_offloading_{{.*}}_ctor, !"kernel", i32 1}
// CHECK-DAG: !{void ()* @__omp_offloading_{{.*}}_dtor, !"kernel", i32 1}
// CHECK-DAG: !{i32 1, !"wchar_size", i32 4}
// CHECK-DAG: !{i32 7, !"openmp", i32 50}
// CHECK-DAG: !{i32 7, !"openmp-device", i32 50}
// CHECK-DAG: !{!"clang version
// CHECK: !0 = !{i32 0, i32 64770, i32 1631194454, !"__omp_offloading__fd02_613a0d56_A_l19_ctor", i32 19, i32 1}
// CHECK: !1 = !{i32 0, i32 64770, i32 1631194454, !"__omp_offloading__fd02_613a0d56_A_l19_dtor", i32 19, i32 2}
// CHECK: !2 = !{i32 1, !"A", i32 0, i32 0}
// CHECK: !3 = !{void ()* @__omp_offloading__fd02_613a0d56_A_l19_ctor, !"kernel", i32 1}
// CHECK: !4 = !{void ()* @__omp_offloading__fd02_613a0d56_A_l19_dtor, !"kernel", i32 1}
// CHECK: !5 = !{i32 1, !"wchar_size", i32 4}
// CHECK: !6 = !{i32 7, !"openmp", i32 50}
// CHECK: !7 = !{i32 7, !"openmp-device", i32 50}
// CHECK: !8 = !{!"clang version 15.0.0"}
//.
1 change: 0 additions & 1 deletion clang/test/OpenMP/declare_target_codegen.cpp
Expand Up @@ -46,7 +46,6 @@
// CHECK-DAG: [[STAT:@.+stat]] = internal global %struct.S zeroinitializer,
// CHECK-DAG: [[STAT_REF:@.+]] = internal constant %struct.S* [[STAT]]
// CHECK-DAG: @out_decl_target ={{ protected | }}global i32 0,
// CHECK-DAG: @llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @__omp_offloading__{{.+}}_globals_l[[@LINE+84]]_ctor to i8*), i8* bitcast (void ()* @__omp_offloading__{{.+}}_stat_l[[@LINE+85]]_ctor to i8*)],
// CHECK-DAG: @llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (%struct.S** [[STAT_REF]] to i8*)],

// CHECK-DAG: define {{.*}}i32 @{{.*}}{{foo|bar|baz2|baz3|FA|f_method}}{{.*}}()
Expand Down
Expand Up @@ -44,7 +44,7 @@ int caz() { return 0; }
static int c = foo() + bar() + baz();
#pragma omp declare target (c)
// HOST-DAG: @[[C_CTOR:__omp_offloading__.+_c_l44_ctor]] = private constant i8 0
// DEVICE-DAG: define internal void [[C_CTOR:@__omp_offloading__.+_c_l44_ctor]]()
// DEVICE-DAG: define weak_odr void [[C_CTOR:@__omp_offloading__.+_c_l44_ctor]]()
// DEVICE-DAG: call noundef i32 [[FOO]]()
// DEVICE-DAG: call noundef i32 [[BAR]]()
// DEVICE-DAG: call noundef i32 [[BAZ]]()
Expand All @@ -61,14 +61,14 @@ struct S {
S cd = doo() + car() + caz() + baz();
#pragma omp end declare target
// HOST-DAG: @[[CD_CTOR:__omp_offloading__.+_cd_l61_ctor]] = private constant i8 0
// DEVICE-DAG: define internal void [[CD_CTOR:@__omp_offloading__.+_cd_l61_ctor]]()
// DEVICE-DAG: define weak_odr void [[CD_CTOR:@__omp_offloading__.+_cd_l61_ctor]]()
// DEVICE-DAG: call noundef i32 [[DOO]]()
// DEVICE-DAG: call noundef i32 [[CAR]]()
// DEVICE-DAG: call noundef i32 [[CAZ]]()
// DEVICE-DAG: ret void

// HOST-DAG: @[[CD_DTOR:__omp_offloading__.+_cd_l61_dtor]] = private constant i8 0
// DEVICE-DAG: define internal void [[CD_DTOR:@__omp_offloading__.+_cd_l61_dtor]]()
// DEVICE-DAG: define weak_odr void [[CD_DTOR:@__omp_offloading__.+_cd_l61_dtor]]()
// DEVICE-DAG: call void
// DEVICE-DAG: ret void

Expand Down

0 comments on commit 3c6d32e

Please sign in to comment.