-
Notifications
You must be signed in to change notification settings - Fork 11.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMP][offload] Fix dynamic schedule tracking #97065
Conversation
@llvm/pr-subscribers-offload @llvm/pr-subscribers-clang Author: Gheorghe-Teodor Bercea (doru1004) ChangesThis patch fixes the dynamic schedule tracking. Patch is 788.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97065.diff 24 Files Affected:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..b47b521edd32c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2553,6 +2553,15 @@ void CGOpenMPRuntime::emitForDispatchInit(
Args);
}
+void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
+ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+ CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
+}
+
static void emitForStaticInitCall(
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
@@ -11996,6 +12005,11 @@ void CGOpenMPSIMDRuntime::emitForDispatchInit(
llvm_unreachable("Not supported in SIMD-only mode");
}
+void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
void CGOpenMPSIMDRuntime::emitForStaticInit(
CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 522ae3d35d22d..f65314d014c08 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -946,6 +946,14 @@ class CGOpenMPRuntime {
unsigned IVSize, bool IVSigned, bool Ordered,
const DispatchRTInput &DispatchValues);
+ /// This is used for non static scheduled types and when the ordered
+ /// clause is present on the loop construct.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ ///
+ virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc);
+
/// Struct with the values to be passed to the static runtime function
struct StaticRTInput {
/// Size of the iteration variable in bits.
@@ -1829,6 +1837,14 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
unsigned IVSize, bool IVSigned, bool Ordered,
const DispatchRTInput &DispatchValues) override;
+ /// This is used for non static scheduled types and when the ordered
+ /// clause is present on the loop construct.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ ///
+ void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override;
+
/// Call the appropriate runtime routine to initialize it before start
/// of loop.
///
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..3c88f4eb71572 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2985,12 +2985,14 @@ void CodeGenFunction::EmitOMPForOuterLoop(
// run-sched-var ICV. If the ICV is set to auto, the schedule is
// implementation defined
//
+ // __kmpc_dispatch_init();
// while(__kmpc_dispatch_next(&LB, &UB)) {
// idx = LB;
// while (idx <= UB) { BODY; ++idx;
// __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
// } // inner loop
// }
+ // __kmpc_dispatch_deinit();
//
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
// When schedule(static, chunk_size) is specified, iterations are divided into
@@ -3044,6 +3046,9 @@ void CodeGenFunction::EmitOMPForOuterLoop(
OuterLoopArgs.DKind = LoopArgs.DKind;
EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
+ if (DynamicOrOrdered) {
+ RT.emitForDispatchDeinit(*this, S.getBeginLoc());
+ }
}
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index a8892a06d4b30..109f1690e448b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -2283,6 +2283,9 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -2533,6 +2536,9 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
+// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -4010,6 +4016,9 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
+// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -4253,6 +4262,9 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
+// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -6314,6 +6326,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -6554,6 +6569,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -8627,6 +8645,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -8867,6 +8888,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -10884,6 +10908,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -11117,6 +11144,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -13146,6 +13176,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -13379,6 +13412,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index a23d538910edd..14dcb17358061 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -2415,12 +2415,15 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
-// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
+// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP38]], 0
// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
@@ -2689,12 +2692,15 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
-// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP39]], 0
// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
@@ -4310,12 +4316,15 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
-// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
+// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK3: .omp.final.then:
-// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0
// CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
@@ -4577,12 +4586,15 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
-// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
+// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK3: .omp.final.then:
-// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0
+// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP39]], 0
// CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
@@ -6836,12 +6848,15 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
-// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
-// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
+// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP34]], 0
// CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
@@ -7100,12 +7115,15 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
-// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
+// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
+// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0
+// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP35]], 0
// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1
// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
@@ -9302,12 +9320,15 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
-// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
-// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
+// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP34]], 0
// CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]],...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
4ea0981
to
a2798b5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you provide a more descriptive summary?
I thought we discussed that the dynamic support would just use the static scheduler, but this seems to implement it? I personally don't want to see more things in the OpenMP runtime relying on malloc
if we can avoid it.
I can add a summary. The problem with the dispatch loop right now is that it is just not testable/runnable. This patch fixes that aspect of it. So wherever the dispatch loop is used it has to be correct. We can re-evaluate the use of static schedule to implement dynamic schedule once this actually works. Malloc cannot be helped here if we want to have correctness. Currently it is just broken and not even runnable. |
I figured that all this code would go away if we just made all schedules static. |
74d567a
to
b07963f
Compare
398a7db
to
99aa391
Compare
4613db6
to
cdacec7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG, @shiltian, OK?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code changes look good now, but I'd prefer to have a non-SPMD mode test case.
cdacec7
to
c94dae9
Compare
|
All good @shiltian ? The test you requested was added Friday. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
This change broke Windows builds of OpenMP. Building succeed, but many build scenarios fail due to See e.g. https://github.com/mstorsjo/llvm-mingw/actions/runs/9753150440/job/26926642868#step:8:801 for an example of a failed OpenMP test run. |
This patch fixes the dynamic schedule tracking.
This patch fixes the dynamic schedule tracking.
This patch fixes the dynamic schedule tracking.