Skip to content

Commit

Permalink
[OpenMP][NFC] Add min/max threads/teams count into the KernelEnvironm…
Browse files Browse the repository at this point in the history
…ent (#70257)

The runtime needs to know about the acceptable launch bounds, especially
if the compiler (middle- or backend) assumed those bounds. While this
patch does not yet inform the runtime, it stores the bounds in a place
that can/will be accessed and is associated with the kernel.
  • Loading branch information
jdoerfert committed Oct 26, 2023
1 parent a002606 commit c2a1249
Show file tree
Hide file tree
Showing 31 changed files with 323 additions and 280 deletions.
2 changes: 1 addition & 1 deletion llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
__OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
__OMP_STRUCT_TYPE(Task, kmp_task_ompbuilder_t, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr)
__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
Int8, Int8, Int8)
Int8, Int8, Int8, Int32, Int32, Int32, Int32)
__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false,
ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr)
Expand Down
27 changes: 17 additions & 10 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4071,18 +4071,21 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD);
ConstantInt *MayUseNestedParallelismVal =
ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true);
ConstantInt *DebugIndentionLevelVal =
ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0);
Constant *IsSPMDVal = ConstantInt::getSigned(
Int8, IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
Constant *UseGenericStateMachineVal = ConstantInt::getSigned(Int8, !IsSPMD);
Constant *MayUseNestedParallelismVal = ConstantInt::getSigned(Int8, true);
Constant *DebugIndentionLevelVal = ConstantInt::getSigned(Int16, 0);

// We need to strip the debug prefix to get the correct kernel name.
Function *Kernel = Builder.GetInsertBlock()->getParent();
auto [MinThreadsVal, MaxThreadsVal] = readThreadBoundsForKernel(*Kernel);
auto [MinTeamsVal, MaxTeamsVal] = readTeamBoundsForKernel(*Kernel);
Constant *MinThreads = ConstantInt::getSigned(Int32, MinThreadsVal);
Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsVal);
Constant *MinTeams = ConstantInt::getSigned(Int32, MinTeamsVal);
Constant *MaxTeams = ConstantInt::getSigned(Int32, MaxTeamsVal);

// We need to strip the debug prefix to get the correct kernel name.
StringRef KernelName = Kernel->getName();
const std::string DebugPrefix = "_debug__";
if (KernelName.ends_with(DebugPrefix))
Expand Down Expand Up @@ -4113,6 +4116,10 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
UseGenericStateMachineVal,
MayUseNestedParallelismVal,
IsSPMDVal,
MinThreads,
MaxThreads,
MinTeams,
MaxTeams,
});
Constant *KernelEnvironmentInitializer = ConstantStruct::get(
KernelEnvironment, {
Expand Down
29 changes: 29 additions & 0 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ namespace KernelInfo {
// uint8_t UseGenericStateMachine;
// uint8_t MayUseNestedParallelism;
// llvm::omp::OMPTgtExecModeFlags ExecMode;
// int32_t MinThreads;
// int32_t MaxThreads;
// int32_t MinTeams;
// int32_t MaxTeams;
// };

// struct DynamicEnvironmentTy {
Expand All @@ -217,6 +221,10 @@ KERNEL_ENVIRONMENT_IDX(Ident, 1)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinThreads, 3)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxThreads, 4)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinTeams, 5)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxTeams, 6)

#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX

Expand All @@ -241,6 +249,10 @@ KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinTeams)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)

#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER

Expand Down Expand Up @@ -3636,6 +3648,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxThreads)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinTeams)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxTeams)

#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER

Expand Down Expand Up @@ -3723,6 +3739,19 @@ struct AAKernelInfoFunction : AAKernelInfo {
else
setExecModeOfKernelEnvironment(AssumedExecModeC);

auto *Int32Ty = Type::getInt32Ty(Fn->getContext());
auto [MinThreads, MaxThreads] =
OpenMPIRBuilder::readThreadBoundsForKernel(*Fn);
if (MinThreads)
setMinThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinThreads));
if (MaxThreads)
setMaxThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxThreads));
auto [MinTeams, MaxTeams] = OpenMPIRBuilder::readTeamBoundsForKernel(*Fn);
if (MinTeams)
setMinTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinTeams));
if (MaxTeams)
setMaxTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxTeams));

ConstantInt *MayUseNestedParallelismC =
KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/OpenMP/always_inline_device.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@G = external global i8

@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }

; Function Attrs: convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
;.
define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK: Function Attrs: norecurse nounwind
Expand Down

0 comments on commit c2a1249

Please sign in to comment.