114 changes: 57 additions & 57 deletions clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions clang/test/OpenMP/target_parallel_debug_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG47]]
// CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true), !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG47]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]]
// CHECK1: user_code.entry:
Expand All @@ -117,7 +117,7 @@ int main() {
// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP18]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP19]], i64 4), !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB5:[0-9]+]], i1 true, i1 true), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG47]]
Expand Down Expand Up @@ -316,7 +316,7 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG137]]
// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB7:[0-9]+]], i1 true, i1 false, i1 true), !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG137]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]]
// CHECK1: user_code.entry:
Expand All @@ -338,7 +338,7 @@ int main() {
// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP19]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP20]], i64 4), !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB11:[0-9]+]], i1 true, i1 true), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG137]]
Expand Down Expand Up @@ -531,7 +531,7 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG212]]
// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB13:[0-9]+]], i1 true, i1 false, i1 true), !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]]
// CHECK1: user_code.entry:
Expand All @@ -549,7 +549,7 @@ int main() {
// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP20]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i32*, [10 x [10 x i32]]*, i8*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i1 true, i1 true), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG212]]
Expand Down
12 changes: 6 additions & 6 deletions clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG41]]
// CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
// CHECK1: user_code.entry:
Expand All @@ -114,7 +114,7 @@ int main() {
// CHECK1-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB6]], i32 [[TMP9]], i32 [[TMP20]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB8:[0-9]+]], i1 true, i1 false), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB8:[0-9]+]], i8 2, i1 false), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
Expand Down Expand Up @@ -392,7 +392,7 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG146]]
// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB10:[0-9]+]], i1 true, i1 false, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
// CHECK1: user_code.entry:
Expand All @@ -414,7 +414,7 @@ int main() {
// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP19]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP20]], i64 4), !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i1 true, i1 false), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 false), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
Expand Down Expand Up @@ -680,7 +680,7 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG236]]
// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB19:[0-9]+]], i1 true, i1 false, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
// CHECK1: user_code.entry:
Expand All @@ -698,7 +698,7 @@ int main() {
// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP20]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i32*, [10 x [10 x i32]]*, i8*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB26:[0-9]+]], i1 true, i1 false), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB26:[0-9]+]], i8 2, i1 false), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)

/// OpenMP Device runtime functions
__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int1, Int1, Int1)
__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int1, Int1)
__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1)
__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2761,7 +2761,9 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,

Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachine =
ConstantInt::getBool(Int32->getContext(), !IsSPMD);
ConstantInt *RequiresFullRuntimeVal =
Expand Down Expand Up @@ -2812,7 +2814,9 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,

Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *RequiresFullRuntimeVal =
ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);

Expand Down
58 changes: 31 additions & 27 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2580,10 +2580,9 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
const int InitIsSPMDArgNo = 1;
auto *IsSPMDModeCI =
dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
return IsSPMDModeCI && IsSPMDModeCI->isZero();
const int InitModeArgNo = 1;
auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
}

if (C->isZero()) {
Expand Down Expand Up @@ -2941,7 +2940,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
return FalseVal;
};

Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB =
Attributor::SimplifictionCallbackTy ModeSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
// IRP represents the "SPMDCompatibilityTracker" argument of an
Expand All @@ -2957,8 +2956,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
} else {
UsedAssumedInformation = false;
}
auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
SPMDCompatibilityTracker.isAssumed());
auto *Val = ConstantInt::getSigned(
IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
: OMP_TGT_EXEC_MODE_GENERIC);
return Val;
};

Expand All @@ -2983,20 +2984,20 @@ struct AAKernelInfoFunction : AAKernelInfo {
return Val;
};

constexpr const int InitIsSPMDArgNo = 1;
constexpr const int DeinitIsSPMDArgNo = 1;
constexpr const int InitModeArgNo = 1;
constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
constexpr const int InitRequiresFullRuntimeArgNo = 3;
constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo),
IsSPMDModeSimplifyCB);
IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
IsSPMDModeSimplifyCB);
IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB,
InitRequiresFullRuntimeArgNo),
Expand All @@ -3007,9 +3008,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
IsGenericModeSimplifyCB);

// Check if we know we are in SPMD-mode already.
ConstantInt *IsSPMDArg =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
if (IsSPMDArg && !IsSPMDArg->isZero())
ConstantInt *ModeArg =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
// This is a generic region but SPMDization is disabled so stop tracking.
else if (DisableOpenMPOptSPMDization)
Expand Down Expand Up @@ -3298,21 +3299,24 @@ struct AAKernelInfoFunction : AAKernelInfo {
ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));

// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
const int InitIsSPMDArgNo = 1;
const int DeinitIsSPMDArgNo = 1;
const int InitModeArgNo = 1;
const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
const int InitRequiresFullRuntimeArgNo = 3;
const int DeinitRequiresFullRuntimeArgNo = 2;

auto &Ctx = getAnchorValue().getContext();
A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
*ConstantInt::getBool(Ctx, 1));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
*ConstantInt::getBool(Ctx, 0));
A.changeUseAfterManifest(
KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
*ConstantInt::getBool(Ctx, 1));
KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, 0));
Expand All @@ -3337,7 +3341,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
assert(ReachedKnownParallelRegions.isValidState() &&
"Custom state machine with invalid parallel region states?");

const int InitIsSPMDArgNo = 1;
const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;

// Check if the current configuration is non-SPMD and generic state machine.
Expand All @@ -3346,14 +3350,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// we give up.
ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
ConstantInt *IsSPMD =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
ConstantInt *Mode =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));

// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD ||
!IsSPMD->isZero())
if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
(Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
return ChangeStatus::UNCHANGED;

// If not SPMD mode, indicate we use a custom state machine now.
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/OpenMP/always_inline_device.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,32 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK: Function Attrs: convergent norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

user_code.entry: ; preds = %entry
call void @bar() #2
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
ret void

worker.exit: ; preds = %entry
ret void
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

; Function Attrs: convergent nounwind
define hidden void @bar() #1 {
Expand Down
164 changes: 82 additions & 82 deletions llvm/test/Transforms/OpenMP/custom_state_machines.ll

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ target triple = "nvptx64"
;; unknown();
;; }
;; }
;;
;;
;; void test_fallback(void) {
;; #pragma omp target teams
;; {
Expand Down Expand Up @@ -60,7 +60,7 @@ target triple = "nvptx64"
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3, !dbg !18
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18

Expand All @@ -77,11 +77,11 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
call void @unknown() #6, !dbg !27
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i1 false, i1 true) #3, !dbg !28
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28
br label %common.ret
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr

; Function Attrs: convergent
declare void @unknown() local_unnamed_addr #1
Expand All @@ -99,13 +99,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr

; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i1 false, i1 true, i1 true) #3, !dbg !33
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33

Expand All @@ -130,7 +130,7 @@ user_code.entry: ; preds = %entry
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
call void @no_openmp()
call void @no_parallelism()
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i1 false, i1 true) #3, !dbg !46
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46
br label %common.ret
}

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/OpenMP/deduplication_target.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,37 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 true, i1 false, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
%2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 true, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true)
ret void

worker.exit: ; preds = %entry
ret void
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)

declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { nounwind }
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 false)
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
Expand All @@ -20,11 +20,11 @@ define void @kernel() {
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 false)
; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false)
; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK-DISABLED: if.then:
Expand All @@ -34,10 +34,10 @@ define void @kernel() {
; CHECK-DISABLED-NEXT: call void @bar()
; CHECK-DISABLED-NEXT: br label [[IF_END]]
; CHECK-DISABLED: if.end:
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
; CHECK-DISABLED-NEXT: ret void
;
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 false)
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
Expand All @@ -47,7 +47,7 @@ if.else:
call void @bar()
br label %if.end
if.end:
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
ret void
}

Expand Down Expand Up @@ -135,9 +135,9 @@ declare i8 @__kmpc_is_generic_main_thread_id(i32)

declare i32 @__kmpc_get_hardware_thread_id()

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@ target triple = "nvptx64"
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
ret void
}

Expand All @@ -38,14 +38,14 @@ define weak void @kernel0() #0 {
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
call void @helper1()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
ret void
}

Expand All @@ -56,7 +56,7 @@ define weak void @kernel2() #0 {
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -68,12 +68,12 @@ define weak void @kernel2() #0 {
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -87,7 +87,7 @@ user_code.entry:
call void @helper1()
call void @helper2()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
ret void
}

Expand Down Expand Up @@ -179,8 +179,8 @@ entry:
}

declare i32 @__kmpc_get_hardware_num_threads_in_block()
declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/OpenMP/global_constructor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
entry:
%0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 true, i1 false, i1 false) #0
%0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false, i1 false) #0
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -29,13 +29,13 @@ region.guarded:

region.barrier:
tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2)
tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 true, i1 false) #0
tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2, i1 false) #0
br label %common.ret
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr

define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
entry:
Expand Down Expand Up @@ -78,7 +78,7 @@ attributes #1 = { convergent nounwind }
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -93,6 +93,6 @@ attributes #1 = { convergent nounwind }
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR1]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,22 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, i32* %.zero.addr, align 4
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
store i32 %1, i32* %.threadid_temp., align 4
call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
ret void

worker.exit: ; preds = %entry
ret void
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
declare void @unknown()

define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
Expand Down Expand Up @@ -146,7 +146,7 @@ entry:

declare i32 @__kmpc_global_thread_num(%struct.ident_t*)

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
entry:
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,26 @@ target triple = "nvptx64"
;.
define weak void @is_spmd() {
; CHECK-LABEL: define {{[^@]+}}@is_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: call void @is_spmd_helper1()
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
call void @is_spmd_helper1()
call void @is_spmd_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
ret void
}

define weak void @will_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -50,12 +50,12 @@ define weak void @will_be_spmd() {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -67,41 +67,41 @@ user_code.entry:
%1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
call void @is_spmd_helper2()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
ret void
}

define weak void @non_spmd() {
; CHECK-LABEL: define {{[^@]+}}@non_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
ret void
}

define weak void @will_not_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
ret void
}

Expand Down Expand Up @@ -197,8 +197,8 @@ entry:

declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable"
declare i8 @__kmpc_is_spmd_exec_mode()
declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext)
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext)
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext)
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
declare void @foo()
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/OpenMP/parallel_level_fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,46 +19,46 @@ target triple = "nvptx64"
;.
define weak void @none_spmd() {
; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
; CHECK-NEXT: call void @none_spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false)
call void @none_spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false)
ret void
}

define weak void @spmd() {
; CHECK-LABEL: define {{[^@]+}}@spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
call void @spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
ret void
}

define weak void @parallel() {
; CHECK-LABEL: define {{[^@]+}}@parallel() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* align 1073741824 null, i1 true, i1 false, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* align 1073741824 null, i8 2, i1 false, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noalias noundef align 1073741824 null, i32 noundef 0, i32 noundef 0, i32 noundef 0, i32 noundef 0, i8* noalias noundef align 1073741824 null, i8* noalias noundef align 1073741824 null, i8** noalias noundef align 1073741824 null, i64 noundef 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false)
call void @spmd_helper()
call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false)
ret void
}

Expand Down Expand Up @@ -130,8 +130,8 @@ define internal void @parallel_helper() {
declare void @foo()
declare void @bar()
declare i8 @__kmpc_parallel_level()
declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1
declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext, i1 zeroext) #1

!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2, !3, !4}
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/OpenMP/remove_globalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ target triple = "nvptx64"

%struct.ident_t = type { i32, i32, i32, i32, i8* }

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
Expand All @@ -25,28 +25,28 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
; CHECK-NEXT: call void @foo() #[[ATTR4:[0-9]+]]
; CHECK-NEXT: call void @bar() #[[ATTR4]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true)
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR4:[0-9]+]]
; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR4]]
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp()
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true)
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
; CHECK-DISABLED-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 true, i1 true)
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true, i1 true)
call void @foo()
call void @bar()
call void @unknown_no_openmp()
call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
ret void
}

Expand Down
32 changes: 16 additions & 16 deletions llvm/test/Transforms/OpenMP/replace_globalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,30 @@ target triple = "nvptx64"

define dso_local void @foo() {
entry:
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
%x = call i8* @__kmpc_alloc_shared(i64 4)
call void @unknown_no_openmp()
%x_on_stack = bitcast i8* %x to i32*
%0 = bitcast i32* %x_on_stack to i8*
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %x, i64 4)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
ret void
}

define void @bar() {
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
call void @unknown_no_openmp()
call void @baz()
call void @qux()
call void @negative_qux_spmd()
call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true)
ret void
}

define internal void @baz() {
entry:
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 true)
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 true)
call void @unknown_no_openmp()
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %master, label %exit
Expand All @@ -57,7 +57,7 @@ exit:

define internal void @qux() {
entry:
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true)
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true)
call void @unknown_no_openmp()
%0 = icmp eq i32 %call, -1
br i1 %0, label %master, label %exit
Expand All @@ -74,7 +74,7 @@ exit:

define internal void @negative_qux_spmd() {
entry:
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 true, i1 true, i1 true)
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 true, i1 true)
call void @unknown_no_openmp()
%0 = icmp eq i32 %call, -1
br i1 %0, label %master, label %exit
Expand Down Expand Up @@ -106,9 +106,9 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()

declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"

Expand Down Expand Up @@ -138,28 +138,28 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
;.
; CHECK-LABEL: define {{[^@]+}}@foo() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
; CHECK-NEXT: [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR4]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bar() {
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @baz()
; CHECK-NEXT: call void @qux()
; CHECK-NEXT: call void @negative_qux_spmd()
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@baz() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true)
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i8 noundef 1, i1 noundef false, i1 noundef true)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER:%.*]], label [[EXIT:%.*]]
Expand All @@ -172,7 +172,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
;
; CHECK-LABEL: define {{[^@]+}}@qux() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true)
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i8 noundef 1, i1 noundef true, i1 noundef true)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]]
Expand All @@ -185,7 +185,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
;
; CHECK-LABEL: define {{[^@]+}}@negative_qux_spmd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef true, i1 noundef true, i1 noundef true)
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i8 noundef 2, i1 noundef true, i1 noundef true)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/OpenMP/single_threaded_execution.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
define void @kernel() {
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 false)
%call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
br label %if.end
if.else:
br label %if.end
if.end:
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true)
ret void
}

Expand Down Expand Up @@ -105,9 +105,9 @@ declare i32 @llvm.amdgcn.workitem.id.x()

declare void @__kmpc_kernel_prepare_parallel(i8*)

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

attributes #0 = { cold noinline }

Expand Down
212 changes: 108 additions & 104 deletions llvm/test/Transforms/OpenMP/spmdization.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/Transforms/OpenMP/spmdization_assumes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8
; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
Expand All @@ -55,12 +55,12 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8
; CHECK: region.exit:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret

Expand All @@ -73,11 +73,11 @@ user_code.entry: ; preds = %entry
store double %call.i, double* %x, align 8, !tbaa !8
%2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0
call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 false, i1 true) #3
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
br label %common.ret
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr

; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 {
Expand Down Expand Up @@ -113,7 +113,7 @@ declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
; Function Attrs: alwaysinline
declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr

; Function Attrs: convergent
declare double @__nv_sin(double) local_unnamed_addr #5
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/OpenMP/spmdization_guarding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
; CHECK-SAME: (i32* [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false, i1 false) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
Expand Down Expand Up @@ -166,7 +166,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR4]]
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR4]]
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
Expand All @@ -176,7 +176,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 false, i1 false, i1 true) #[[ATTR4:[0-9]+]]
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false, i1 true) #[[ATTR4:[0-9]+]]
; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLED: worker_state_machine.begin:
Expand Down Expand Up @@ -245,14 +245,14 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 false, i1 true) #[[ATTR4]]
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 true) #[[ATTR4]]
; CHECK-DISABLED-NEXT: ret void
; CHECK-DISABLED: worker.exit:
; CHECK-DISABLED-NEXT: ret void
;
entry:
%N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit

Expand Down Expand Up @@ -298,7 +298,7 @@ __omp_outlined__.exit: ; preds = %for.cond.i
%call14.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
%call15.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
%call16.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 false, i1 true) #3
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3
ret void

worker.exit: ; preds = %entry
Expand Down Expand Up @@ -330,7 +330,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) {

declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1)

; Function Attrs: convergent
declare i32 @no_openmp(i32*) #1
Expand All @@ -341,7 +341,7 @@ declare void @pure() #2
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1)

; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
declare void @llvm.experimental.noalias.scope.decl(metadata) #4
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/OpenMP/spmdization_remarks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ target triple = "nvptx64"
;; unknown();
;; }
;; }
;;
;;
;; void test_fallback(void) {
;; #pragma omp target teams
;; {
Expand Down Expand Up @@ -62,7 +62,7 @@ target triple = "nvptx64"
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3, !dbg !18
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18

Expand All @@ -79,11 +79,11 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
call void @unknown() #6, !dbg !27
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i1 false, i1 true) #3, !dbg !28
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28
br label %common.ret
}

declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr

; Function Attrs: convergent
declare void @unknown() local_unnamed_addr #1
Expand All @@ -101,13 +101,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3

declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr

; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i1 false, i1 true, i1 true) #3, !dbg !33
%0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33

Expand All @@ -131,7 +131,7 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
call void @spmd_amenable()
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i1 false, i1 true) #3, !dbg !46
call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46
br label %common.ret
}

Expand Down
4 changes: 2 additions & 2 deletions openmp/libomptarget/DeviceRTL/include/Interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,10 @@ uint32_t __kmpc_get_hardware_thread_id_in_block();
///{
int8_t __kmpc_is_spmd_exec_mode();

int32_t __kmpc_target_init(IdentTy *Ident, bool IsSPMD,
int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
bool UseGenericStateMachine, bool);

void __kmpc_target_deinit(IdentTy *Ident, bool IsSPMD, bool);
void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool);

///}

Expand Down
5 changes: 5 additions & 0 deletions openmp/libomptarget/DeviceRTL/include/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ typedef enum omp_allocator_handle_t {
KMP_ALLOCATOR_MAX_HANDLE = ~(0U)
} omp_allocator_handle_t;

enum OMPTgtExecModeFlags : int8_t {
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
};

#define __PRAGMA(STR) _Pragma(#STR)
#define OMP_PRAGMA(STR) __PRAGMA(omp STR)

Expand Down
6 changes: 4 additions & 2 deletions openmp/libomptarget/DeviceRTL/src/Kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ extern "C" {
///
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(IdentTy *Ident, bool IsSPMD,
int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
bool UseGenericStateMachine, bool) {
const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
if (IsSPMD) {
inititializeRuntime(/* IsSPMD */ true);
synchronize::threads();
Expand Down Expand Up @@ -96,7 +97,8 @@ int32_t __kmpc_target_init(IdentTy *Ident, bool IsSPMD,
///
/// \param Ident Source location identification, can be NULL.
///
void __kmpc_target_deinit(IdentTy *Ident, bool IsSPMD, bool) {
void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) {
const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
state::assumeInitialState(IsSPMD);
if (IsSPMD)
return;
Expand Down
8 changes: 4 additions & 4 deletions openmp/libomptarget/deviceRTLs/common/include/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ typedef struct ident ident_t;
///
/// \code
/// void kernel(...) {
/// ThreadKind = __kmpc_target_init(Ident, /* IsSPMD */ false,
/// ThreadKind = __kmpc_target_init(Ident, /* Mode */ 1,
/// /* UseGenericStateMachine */ true,
/// /* RequiresFullRuntime */ ... );
/// if (ThreadKind == -1) {
Expand All @@ -50,7 +50,7 @@ typedef struct ident ident_t;
///
/// \code
/// void kernel(...) {
/// ThreadKind = __kmpc_target_init(Ident, /* IsSPMD */ false,
/// ThreadKind = __kmpc_target_init(Ident, /* Mode */ 1,
/// /* UseGenericStateMachine */ false,
/// /* RequiresFullRuntime */ ... );
/// if (ThreadKind == -1) {
Expand All @@ -72,7 +72,7 @@ typedef struct ident ident_t;
///
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode,
bool UseGenericStateMachine,
bool RequiresFullRuntime);

Expand All @@ -86,7 +86,7 @@ int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
///
/// \param Ident Source location identification, can be NULL.
///
void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD,
void __kmpc_target_deinit(ident_t *Ident, int8_t Mode,
bool RequiresFullRuntime);

///}
Expand Down
2 changes: 1 addition & 1 deletion openmp/libomptarget/deviceRTLs/common/omptarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ extern uint16_t EXTERN_SHARED(nThreads);
extern omptarget_nvptx_ThreadPrivateContext *
EXTERN_SHARED(omptarget_nvptx_threadPrivateContext);

extern uint32_t EXTERN_SHARED(execution_param);
extern int8_t EXTERN_SHARED(execution_param);
extern void *EXTERN_SHARED(ReductionScratchpadPtr);

////////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ omptarget_nvptx_WorkFn SHARED(omptarget_nvptx_workFn);
////////////////////////////////////////////////////////////////////////////////
// OpenMP kernel execution parameters
////////////////////////////////////////////////////////////////////////////////
uint32_t SHARED(execution_param);
int8_t SHARED(execution_param);

////////////////////////////////////////////////////////////////////////////////
// Scratchpad for teams reduction.
Expand Down
18 changes: 10 additions & 8 deletions openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ static void __kmpc_generic_kernel_init() {
if (threadIdInBlock != GetMasterThreadID())
return;

setExecutionParameters(Generic, RuntimeInitialized);
setExecutionParameters(OMP_TGT_EXEC_MODE_GENERIC, OMP_TGT_RUNTIME_INITIALIZED);
ASSERT0(LT_FUSSY, threadIdInBlock == GetMasterThreadID(),
"__kmpc_kernel_init() must be called by team master warp only!");
PRINT0(LD_IO, "call to __kmpc_kernel_init for master\n");
Expand Down Expand Up @@ -85,8 +85,9 @@ static void __kmpc_generic_kernel_deinit() {
static void __kmpc_spmd_kernel_init(bool RequiresFullRuntime) {
PRINT0(LD_IO, "call to __kmpc_spmd_kernel_init\n");

setExecutionParameters(Spmd, RequiresFullRuntime ? RuntimeInitialized
: RuntimeUninitialized);
setExecutionParameters(OMP_TGT_EXEC_MODE_SPMD,
RequiresFullRuntime ? OMP_TGT_RUNTIME_INITIALIZED
: OMP_TGT_RUNTIME_UNINITIALIZED);
int threadId = __kmpc_get_hardware_thread_id_in_block();
if (threadId == 0) {
usedSlotIdx = __kmpc_impl_smid() % MAX_SM;
Expand Down Expand Up @@ -160,7 +161,7 @@ static void __kmpc_spmd_kernel_deinit(bool RequiresFullRuntime) {

// Return true if the current target region is executed in SPMD mode.
EXTERN int8_t __kmpc_is_spmd_exec_mode() {
return (execution_param & ModeMask) == Spmd;
return execution_param & OMP_TGT_EXEC_MODE_SPMD;
}

EXTERN int8_t __kmpc_is_generic_main_thread(kmp_int32 Tid) {
Expand Down Expand Up @@ -202,9 +203,10 @@ static void __kmpc_target_region_state_machine(ident_t *Ident) {
}

EXTERN
int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode,
bool UseGenericStateMachine,
bool RequiresFullRuntime) {
const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
int TId = __kmpc_get_hardware_thread_id_in_block();
if (IsSPMD)
__kmpc_spmd_kernel_init(RequiresFullRuntime);
Expand All @@ -226,13 +228,13 @@ int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
}

EXTERN
void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD,
bool RequiresFullRuntime) {
void __kmpc_target_deinit(ident_t *Ident, int8_t Mode,
bool RequiresFullRuntime) {
const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD;
if (IsSPMD)
__kmpc_spmd_kernel_deinit(RequiresFullRuntime);
else
__kmpc_generic_kernel_deinit();
}


#pragma omp end declare target
11 changes: 5 additions & 6 deletions openmp/libomptarget/deviceRTLs/common/src/support.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,18 @@
// Execution Parameters
////////////////////////////////////////////////////////////////////////////////

void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
void setExecutionParameters(OMPTgtExecModeFlags EMode,
OMPTgtRuntimeModeFlags RMode) {
execution_param = EMode;
execution_param |= RMode;
}

bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
bool isGenericMode() { return execution_param & OMP_TGT_EXEC_MODE_GENERIC; }

bool isRuntimeUninitialized() {
return (execution_param & RuntimeMask) == RuntimeUninitialized;
}
bool isRuntimeUninitialized() { return !isRuntimeInitialized(); }

bool isRuntimeInitialized() {
return (execution_param & RuntimeMask) == RuntimeInitialized;
return execution_param & OMP_TGT_RUNTIME_INITIALIZED;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
17 changes: 8 additions & 9 deletions openmp/libomptarget/deviceRTLs/common/support.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,18 @@
////////////////////////////////////////////////////////////////////////////////
// Execution Parameters
////////////////////////////////////////////////////////////////////////////////
enum ExecutionMode {
Spmd = 0x00u,
Generic = 0x01u,
ModeMask = 0x01u,
enum OMPTgtExecModeFlags : int8_t {
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
OMP_TGT_EXEC_MODE_SPMD = 1 << 1
};

enum RuntimeMode {
RuntimeInitialized = 0x00u,
RuntimeUninitialized = 0x02u,
RuntimeMask = 0x02u,
enum OMPTgtRuntimeModeFlags : int8_t {
OMP_TGT_RUNTIME_UNINITIALIZED = 0,
OMP_TGT_RUNTIME_INITIALIZED = 1 << 2
};

void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode);
void setExecutionParameters(OMPTgtExecModeFlags EMode,
OMPTgtRuntimeModeFlags RMode);
bool isGenericMode();
bool isRuntimeUninitialized();
bool isRuntimeInitialized();
Expand Down
4 changes: 2 additions & 2 deletions openmp/libomptarget/deviceRTLs/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,10 +416,10 @@ EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal);

// non standard
EXTERN int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
EXTERN int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode,
bool UseGenericStateMachine,
bool RequiresFullRuntime);
EXTERN void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD,
EXTERN void __kmpc_target_deinit(ident_t *Ident, int8_t Mode,
bool RequiresFullRuntime);
EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn);
EXTERN bool __kmpc_kernel_parallel(void **WorkFn);
Expand Down