Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OpenMP][NFC] Add min/max threads/teams count into the KernelEnvironment #70257

Merged
merged 1 commit into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
__OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
__OMP_STRUCT_TYPE(Task, kmp_task_ompbuilder_t, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr)
__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
Int8, Int8, Int8)
Int8, Int8, Int8, Int32, Int32, Int32, Int32)
__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false,
ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr)
Expand Down
27 changes: 17 additions & 10 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4071,18 +4071,21 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD);
ConstantInt *MayUseNestedParallelismVal =
ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true);
ConstantInt *DebugIndentionLevelVal =
ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0);
Constant *IsSPMDVal = ConstantInt::getSigned(
Int8, IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
Constant *UseGenericStateMachineVal = ConstantInt::getSigned(Int8, !IsSPMD);
Constant *MayUseNestedParallelismVal = ConstantInt::getSigned(Int8, true);
Constant *DebugIndentionLevelVal = ConstantInt::getSigned(Int16, 0);

// We need to strip the debug prefix to get the correct kernel name.
Function *Kernel = Builder.GetInsertBlock()->getParent();
auto [MinThreadsVal, MaxThreadsVal] = readThreadBoundsForKernel(*Kernel);
auto [MinTeamsVal, MaxTeamsVal] = readTeamBoundsForKernel(*Kernel);
Constant *MinThreads = ConstantInt::getSigned(Int32, MinThreadsVal);
Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsVal);
Constant *MinTeams = ConstantInt::getSigned(Int32, MinTeamsVal);
Constant *MaxTeams = ConstantInt::getSigned(Int32, MaxTeamsVal);

// We need to strip the debug prefix to get the correct kernel name.
StringRef KernelName = Kernel->getName();
const std::string DebugPrefix = "_debug__";
if (KernelName.ends_with(DebugPrefix))
Expand Down Expand Up @@ -4113,6 +4116,10 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
UseGenericStateMachineVal,
MayUseNestedParallelismVal,
IsSPMDVal,
MinThreads,
MaxThreads,
MinTeams,
MaxTeams,
});
Constant *KernelEnvironmentInitializer = ConstantStruct::get(
KernelEnvironment, {
Expand Down
29 changes: 29 additions & 0 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ namespace KernelInfo {
// uint8_t UseGenericStateMachine;
// uint8_t MayUseNestedParallelism;
// llvm::omp::OMPTgtExecModeFlags ExecMode;
// int32_t MinThreads;
// int32_t MaxThreads;
// int32_t MinTeams;
// int32_t MaxTeams;
// };

// struct DynamicEnvironmentTy {
Expand All @@ -217,6 +221,10 @@ KERNEL_ENVIRONMENT_IDX(Ident, 1)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinThreads, 3)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxThreads, 4)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinTeams, 5)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxTeams, 6)

#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX

Expand All @@ -241,6 +249,10 @@ KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinTeams)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)

#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER

Expand Down Expand Up @@ -3636,6 +3648,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinTeams)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxTeams)

#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER

Expand Down Expand Up @@ -3723,6 +3739,19 @@ struct AAKernelInfoFunction : AAKernelInfo {
else
setExecModeOfKernelEnvironment(AssumedExecModeC);

auto *Int32Ty = Type::getInt32Ty(Fn->getContext());
auto [MinThreads, MaxThreads] =
OpenMPIRBuilder::readThreadBoundsForKernel(*Fn);
if (MinThreads)
setMinThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinThreads));
if (MaxThreads)
setMaxThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxThreads));
auto [MinTeams, MaxTeams] = OpenMPIRBuilder::readTeamBoundsForKernel(*Fn);
if (MinTeams)
setMinTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinTeams));
if (MaxTeams)
setMaxTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxTeams));

ConstantInt *MayUseNestedParallelismC =
KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/OpenMP/always_inline_device.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@G = external global i8

@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }

; Function Attrs: convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
;.
define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK: Function Attrs: norecurse nounwind
Expand Down