| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,410 @@ | ||
| // RUN: %libomp-compile-and-run | ||
| /* | ||
| Test for the 'schedule(simd:guided)' clause. | ||
| Compiler needs to generate a dynamic dispatching and pass the schedule | ||
| value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations. | ||
| */ | ||
| #include <stdio.h> | ||
| #include <omp.h> | ||
|
|
||
| #if defined(WIN32) || defined(_WIN32) | ||
| #include <windows.h> | ||
| #define delay() Sleep(1); | ||
| #else | ||
| #include <unistd.h> | ||
| #define delay() usleep(10); | ||
| #endif | ||
|
|
||
| // uncomment for debug diagnostics: | ||
| //#define DEBUG | ||
|
|
||
| #define SIMD_LEN 4 | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Various definitions copied from OpenMP RTL | ||
| enum sched { | ||
| kmp_sch_static_balanced_chunked = 45, | ||
| kmp_sch_guided_simd = 46, | ||
| kmp_sch_runtime_simd = 47, | ||
| }; | ||
| typedef unsigned u32; | ||
| typedef long long i64; | ||
| typedef unsigned long long u64; | ||
| typedef struct { | ||
| int reserved_1; | ||
| int flags; | ||
| int reserved_2; | ||
| int reserved_3; | ||
| char *psource; | ||
| } id; | ||
|
|
||
| extern int __kmpc_global_thread_num(id*); | ||
| extern void __kmpc_barrier(id*, int gtid); | ||
| extern void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); | ||
| extern void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); | ||
| extern int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); | ||
| extern int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); | ||
| // End of definitions copied from OpenMP RTL. | ||
| // --------------------------------------------------------------------------- | ||
| static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| int run_loop_64(i64 loop_lb, i64 loop_ub, i64 loop_st, int loop_chunk) { | ||
| int err = 0; | ||
| static int volatile loop_sync = 0; | ||
| i64 lb; // Chunk lower bound | ||
| i64 ub; // Chunk upper bound | ||
| i64 st; // Chunk stride | ||
| int rc; | ||
| int tid = omp_get_thread_num(); | ||
| int gtid = tid; | ||
| int last; | ||
| #if DEBUG | ||
| printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", | ||
| (int)sizeof(i64), gtid, tid, | ||
| (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); | ||
| #endif | ||
| // Don't test degenerate cases that should have been discovered by codegen | ||
| if (loop_st == 0) | ||
| return 0; | ||
| if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) | ||
| return 0; | ||
|
|
||
| __kmpc_dispatch_init_8(&loc, gtid, kmp_sch_guided_simd, | ||
| loop_lb, loop_ub, loop_st, loop_chunk); | ||
| if (tid == 0) { | ||
| // Let the master thread handle the chunks alone | ||
| int chunk; // No of current chunk | ||
| i64 next_lb; // Lower bound of the next chunk | ||
| i64 last_ub; // Upper bound of the last processed chunk | ||
| u64 cur; // Number of interations in current chunk | ||
| u64 max; // Max allowed iterations for current chunk | ||
| int undersized = 0; | ||
|
|
||
| chunk = 0; | ||
| next_lb = loop_lb; | ||
| max = (loop_ub - loop_lb) / loop_st + 1; | ||
| // The first chunk can consume all iterations | ||
| while (__kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st)) { | ||
| ++ chunk; | ||
| #if DEBUG | ||
| printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); | ||
| #endif | ||
| // Check if previous chunk (it is not the final chunk) is undersized | ||
| if (undersized) { | ||
| printf("Error with chunk %d\n", chunk); | ||
| err++; | ||
| } | ||
| // Check lower and upper bounds | ||
| if (lb != next_lb) { | ||
| printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); | ||
| err++; | ||
| } | ||
| if (loop_st > 0) { | ||
| if (!(ub <= loop_ub)) { | ||
| printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(lb <= ub)) { | ||
| printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); | ||
| err++; | ||
| } | ||
| } else { | ||
| if (!(ub >= loop_ub)) { | ||
| printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(lb >= ub)) { | ||
| printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); | ||
| err++; | ||
| } | ||
| }; // if | ||
| // Stride should not change | ||
| if (!(st == loop_st)) { | ||
| printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); | ||
| err++; | ||
| } | ||
| cur = (ub - lb) / loop_st + 1; | ||
| // Guided scheduling uses FP computations, so current chunk may | ||
| // be a bit bigger (+1) than allowed maximum | ||
| if (!(cur <= max + 1)) { | ||
| printf("Error with iter %d, %d\n", cur, max); | ||
| err++; | ||
| } | ||
| // Update maximum for the next chunk | ||
| if (cur < max) | ||
| max = cur; | ||
| next_lb = ub + loop_st; | ||
| last_ub = ub; | ||
| undersized = (cur < loop_chunk); | ||
| }; // while | ||
| // Must have at least one chunk | ||
| if (!(chunk > 0)) { | ||
| printf("Error with chunk %d\n", chunk); | ||
| err++; | ||
| } | ||
| // Must have the right last iteration index | ||
| if (loop_st > 0) { | ||
| if (!(last_ub <= loop_ub)) { | ||
| printf("Error with last1 %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(last_ub + loop_st > loop_ub)) { | ||
| printf("Error with last2 %d, %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| } else { | ||
| if (!(last_ub >= loop_ub)) { | ||
| printf("Error with last1 %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(last_ub + loop_st < loop_ub)) { | ||
| printf("Error with last2 %d, %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| }; // if | ||
| // Let non-master threads go | ||
| loop_sync = 1; | ||
| } else { | ||
| int i; | ||
| // Workers wait for master thread to finish, then call __kmpc_dispatch_next | ||
| for (i = 0; i < 1000000; ++ i) { | ||
| if (loop_sync != 0) { | ||
| break; | ||
| }; // if | ||
| }; // for i | ||
| while (loop_sync == 0) { | ||
| delay(); | ||
| }; // while | ||
| // At this moment we do not have any more chunks -- all the chunks already | ||
| // processed by master thread | ||
| rc = __kmpc_dispatch_next_8(&loc, gtid, &last, &lb, &ub, &st); | ||
| if (rc) { | ||
| printf("Error return value\n"); | ||
| err++; | ||
| } | ||
| }; // if | ||
|
|
||
| __kmpc_barrier(&loc, gtid); | ||
| if (tid == 0) { | ||
| loop_sync = 0; // Restore original state | ||
| #if DEBUG | ||
| printf("run_loop_64(): at the end\n"); | ||
| #endif | ||
| }; // if | ||
| __kmpc_barrier(&loc, gtid); | ||
| return err; | ||
| } // run_loop | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| int run_loop_32(int loop_lb, int loop_ub, int loop_st, int loop_chunk) { | ||
| int err = 0; | ||
| static int volatile loop_sync = 0; | ||
| int lb; // Chunk lower bound | ||
| int ub; // Chunk upper bound | ||
| int st; // Chunk stride | ||
| int rc; | ||
| int tid = omp_get_thread_num(); | ||
| int gtid = tid; | ||
| int last; | ||
| #if DEBUG | ||
| printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n", | ||
| (int)sizeof(int), gtid, tid, | ||
| (int)loop_lb, (int)loop_ub, (int)loop_st, loop_chunk); | ||
| #endif | ||
| // Don't test degenerate cases that should have been discovered by codegen | ||
| if (loop_st == 0) | ||
| return 0; | ||
| if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) | ||
| return 0; | ||
|
|
||
| __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_guided_simd, | ||
| loop_lb, loop_ub, loop_st, loop_chunk); | ||
| if (tid == 0) { | ||
| // Let the master thread handle the chunks alone | ||
| int chunk; // No of current chunk | ||
| int next_lb; // Lower bound of the next chunk | ||
| int last_ub; // Upper bound of the last processed chunk | ||
| u64 cur; // Number of interations in current chunk | ||
| u64 max; // Max allowed iterations for current chunk | ||
| int undersized = 0; | ||
|
|
||
| chunk = 0; | ||
| next_lb = loop_lb; | ||
| max = (loop_ub - loop_lb) / loop_st + 1; | ||
| // The first chunk can consume all iterations | ||
| while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { | ||
| ++ chunk; | ||
| #if DEBUG | ||
| printf("chunk=%d, lb=%d, ub=%d\n", chunk, (int)lb, (int)ub); | ||
| #endif | ||
| // Check if previous chunk (it is not the final chunk) is undersized | ||
| if (undersized) { | ||
| printf("Error with chunk %d\n", chunk); | ||
| err++; | ||
| } | ||
| // Check lower and upper bounds | ||
| if (lb != next_lb) { | ||
| printf("Error with lb %d, %d, ch %d\n", (int)lb, (int)next_lb, chunk); | ||
| err++; | ||
| } | ||
| if (loop_st > 0) { | ||
| if (!(ub <= loop_ub)) { | ||
| printf("Error with ub %d, %d, ch %d\n", (int)ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(lb <= ub)) { | ||
| printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); | ||
| err++; | ||
| } | ||
| } else { | ||
| if (!(ub >= loop_ub)) { | ||
| printf("Error with ub %d, %d, %d\n", (int)ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(lb >= ub)) { | ||
| printf("Error with bounds %d, %d, %d\n", (int)lb, (int)ub, chunk); | ||
| err++; | ||
| } | ||
| }; // if | ||
| // Stride should not change | ||
| if (!(st == loop_st)) { | ||
| printf("Error with st %d, %d, ch %d\n", (int)st, (int)loop_st, chunk); | ||
| err++; | ||
| } | ||
| cur = (ub - lb) / loop_st + 1; | ||
| // Guided scheduling uses FP computations, so current chunk may | ||
| // be a bit bigger (+1) than allowed maximum | ||
| if (!(cur <= max + 1)) { | ||
| printf("Error with iter %d, %d\n", cur, max); | ||
| err++; | ||
| } | ||
| // Update maximum for the next chunk | ||
| if (cur < max) | ||
| max = cur; | ||
| next_lb = ub + loop_st; | ||
| last_ub = ub; | ||
| undersized = (cur < loop_chunk); | ||
| }; // while | ||
| // Must have at least one chunk | ||
| if (!(chunk > 0)) { | ||
| printf("Error with chunk %d\n", chunk); | ||
| err++; | ||
| } | ||
| // Must have the right last iteration index | ||
| if (loop_st > 0) { | ||
| if (!(last_ub <= loop_ub)) { | ||
| printf("Error with last1 %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(last_ub + loop_st > loop_ub)) { | ||
| printf("Error with last2 %d, %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| } else { | ||
| if (!(last_ub >= loop_ub)) { | ||
| printf("Error with last1 %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| if (!(last_ub + loop_st < loop_ub)) { | ||
| printf("Error with last2 %d, %d, %d, ch %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk); | ||
| err++; | ||
| } | ||
| }; // if | ||
| // Let non-master threads go | ||
| loop_sync = 1; | ||
| } else { | ||
| int i; | ||
| // Workers wait for master thread to finish, then call __kmpc_dispatch_next | ||
| for (i = 0; i < 1000000; ++ i) { | ||
| if (loop_sync != 0) { | ||
| break; | ||
| }; // if | ||
| }; // for i | ||
| while (loop_sync == 0) { | ||
| delay(); | ||
| }; // while | ||
| // At this moment we do not have any more chunks -- all the chunks already | ||
| // processed by the master thread | ||
| rc = __kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st); | ||
| if (rc) { | ||
| printf("Error return value\n"); | ||
| err++; | ||
| } | ||
| }; // if | ||
|
|
||
| __kmpc_barrier(&loc, gtid); | ||
| if (tid == 0) { | ||
| loop_sync = 0; // Restore original state | ||
| #if DEBUG | ||
| printf("run_loop<>(): at the end\n"); | ||
| #endif | ||
| }; // if | ||
| __kmpc_barrier(&loc, gtid); | ||
| return err; | ||
| } // run_loop | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| int run_64(int num_th) | ||
| { | ||
| int err = 0; | ||
| #pragma omp parallel num_threads(num_th) | ||
| { | ||
| int chunk; | ||
| i64 st, lb, ub; | ||
| for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { | ||
| for (st = 1; st <= 3; ++ st) { | ||
| for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { | ||
| for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { | ||
| err += run_loop_64(lb, ub, st, chunk); | ||
| err += run_loop_64(ub, lb, -st, chunk); | ||
| }; // for ub | ||
| }; // for lb | ||
| }; // for st | ||
| }; // for chunk | ||
| } | ||
| return err; | ||
| } // run_all | ||
|
|
||
| int run_32(int num_th) | ||
| { | ||
| int err = 0; | ||
| #pragma omp parallel num_threads(num_th) | ||
| { | ||
| int chunk, st, lb, ub; | ||
| for (chunk = SIMD_LEN; chunk <= 3*SIMD_LEN; chunk += SIMD_LEN) { | ||
| for (st = 1; st <= 3; ++ st) { | ||
| for (lb = -3 * num_th * st; lb <= 3 * num_th * st; ++ lb) { | ||
| for (ub = lb; ub < lb + num_th * (chunk+1) * st; ++ ub) { | ||
| err += run_loop_32(lb, ub, st, chunk); | ||
| err += run_loop_32(ub, lb, -st, chunk); | ||
| }; // for ub | ||
| }; // for lb | ||
| }; // for st | ||
| }; // for chunk | ||
| } | ||
| return err; | ||
| } // run_all | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| int main() | ||
| { | ||
| int n, err = 0; | ||
| for (n = 1; n <= 4; ++ n) { | ||
| err += run_32(n); | ||
| err += run_64(n); | ||
| }; // for n | ||
| if (err) | ||
| printf("failed with %d errors\n", err); | ||
| else | ||
| printf("passed\n"); | ||
| return err; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,221 @@ | ||
| // RUN: %libomp-compile-and-run | ||
|
|
||
| // The test checks schedule(simd:runtime) | ||
| // in combination with omp_set_schedule() | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <omp.h> | ||
|
|
||
| #if defined(WIN32) || defined(_WIN32) | ||
| #include <windows.h> | ||
| #define delay() Sleep(1); | ||
| #define seten(a,b,c) _putenv_s((a),(b)) | ||
| #else | ||
| #include <unistd.h> | ||
| #define delay() usleep(10); | ||
| #define seten(a,b,c) setenv((a),(b),(c)) | ||
| #endif | ||
|
|
||
| #define SIMD_LEN 4 | ||
| int err = 0; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Various definitions copied from OpenMP RTL. | ||
| enum sched { | ||
| kmp_sch_static_balanced_chunked = 45, | ||
| kmp_sch_guided_simd = 46, | ||
| kmp_sch_runtime_simd = 47, | ||
| }; | ||
| typedef unsigned u32; | ||
| typedef long long i64; | ||
| typedef unsigned long long u64; | ||
| typedef struct { | ||
| int reserved_1; | ||
| int flags; | ||
| int reserved_2; | ||
| int reserved_3; | ||
| char *psource; | ||
| } id; | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
| int __kmpc_global_thread_num(id*); | ||
| void __kmpc_barrier(id*, int gtid); | ||
| void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); | ||
| void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); | ||
| int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); | ||
| int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); | ||
| #ifdef __cplusplus | ||
| } // extern "C" | ||
| #endif | ||
| // End of definitions copied from OpenMP RTL. | ||
| // --------------------------------------------------------------------------- | ||
| static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| void | ||
| run_loop( | ||
| int loop_lb, // Loop lower bound. | ||
| int loop_ub, // Loop upper bound. | ||
| int loop_st, // Loop stride. | ||
| int lchunk | ||
| ) { | ||
| static int volatile loop_sync = 0; | ||
| int lb; // Chunk lower bound. | ||
| int ub; // Chunk upper bound. | ||
| int st; // Chunk stride. | ||
| int rc; | ||
| int tid = omp_get_thread_num(); | ||
| int gtid = __kmpc_global_thread_num(&loc); | ||
| int last; | ||
| int tc = (loop_ub - loop_lb) / loop_st + 1; | ||
| int ch; | ||
| int no_chunk = 0; | ||
| if (lchunk == 0) { | ||
| no_chunk = 1; | ||
| lchunk = 1; | ||
| } | ||
| ch = lchunk * SIMD_LEN; | ||
| #if _DEBUG > 1 | ||
| printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", | ||
| gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); | ||
| #endif | ||
| // Don't test degenerate cases that should have been discovered by codegen. | ||
| if (loop_st == 0) | ||
| return; | ||
| if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) | ||
| return; | ||
| __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, | ||
| loop_lb, loop_ub, loop_st, SIMD_LEN); | ||
| { | ||
| // Let the master thread handle the chunks alone. | ||
| int chunk; // No of current chunk. | ||
| int last_ub; // Upper bound of the last processed chunk. | ||
| u64 cur; // Number of interations in current chunk. | ||
| u64 max; // Max allowed iterations for current chunk. | ||
| int undersized = 0; | ||
| last_ub = loop_ub; | ||
| chunk = 0; | ||
| max = (loop_ub - loop_lb) / loop_st + 1; | ||
| // The first chunk can consume all iterations. | ||
| while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { | ||
| ++ chunk; | ||
| #if _DEBUG | ||
| printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", | ||
| tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); | ||
| #endif | ||
| // Check if previous chunk (it is not the final chunk) is undersized. | ||
| if (undersized) | ||
| printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); | ||
| if (loop_st > 0) { | ||
| if (!(ub <= loop_ub)) | ||
| printf("Error with ub %d, %d, ch %d, err %d\n", | ||
| (int)ub, (int)loop_ub, chunk, ++err); | ||
| if (!(lb <= ub)) | ||
| printf("Error with bounds %d, %d, %d, err %d\n", | ||
| (int)lb, (int)ub, chunk, ++err); | ||
| } else { | ||
| if (!(ub >= loop_ub)) | ||
| printf("Error with ub %d, %d, %d, err %d\n", | ||
| (int)ub, (int)loop_ub, chunk, ++err); | ||
| if (!(lb >= ub)) | ||
| printf("Error with bounds %d, %d, %d, err %d\n", | ||
| (int)lb, (int)ub, chunk, ++err); | ||
| }; // if | ||
| // Stride should not change. | ||
| if (!(st == loop_st)) | ||
| printf("Error with st %d, %d, ch %d, err %d\n", | ||
| (int)st, (int)loop_st, chunk, ++err); | ||
| cur = ( ub - lb ) / loop_st + 1; | ||
| // Guided scheduling uses FP computations, so current chunk may | ||
| // be a bit bigger (+1) than allowed maximum. | ||
| if (!( cur <= max + 1)) | ||
| printf("Error with iter %d, %d, err %d\n", cur, max, ++err); | ||
| // Update maximum for the next chunk. | ||
| if (last) { | ||
| if (!no_chunk && cur > ch) | ||
| printf("Error: too big last chunk %d (%d), tid %d, err %d\n", | ||
| (int)cur, ch, tid, ++err); | ||
| } else { | ||
| if (cur % ch) | ||
| printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", | ||
| chunk, (int)cur, ch, tid, ++err); | ||
| } | ||
| if (cur < max) | ||
| max = cur; | ||
| last_ub = ub; | ||
| undersized = (cur < ch); | ||
| #if _DEBUG > 1 | ||
| if (last) | ||
| printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", | ||
| undersized,cur,ch,tid,ub,lb,loop_st); | ||
| #endif | ||
| } // while | ||
| // Must have the right last iteration index. | ||
| if (loop_st > 0) { | ||
| if (!(last_ub <= loop_ub)) | ||
| printf("Error with last1 %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk, ++err); | ||
| if (last && !(last_ub + loop_st > loop_ub)) | ||
| printf("Error with last2 %d, %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); | ||
| } else { | ||
| if (!(last_ub >= loop_ub)) | ||
| printf("Error with last1 %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk, ++err); | ||
| if (last && !(last_ub + loop_st < loop_ub)) | ||
| printf("Error with last2 %d, %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); | ||
| } // if | ||
| } | ||
| __kmpc_barrier(&loc, gtid); | ||
| } // run_loop | ||
|
|
||
| int main(int argc, char *argv[]) | ||
| { | ||
| int chunk = 0; | ||
| // static (no chunk) | ||
| omp_set_schedule(omp_sched_static,0); | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
|
|
||
| // auto (chunk should be ignorted) | ||
| omp_set_schedule(omp_sched_auto,0); | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
|
|
||
| // static,1 | ||
| chunk = 1; | ||
| omp_set_schedule(omp_sched_static,1); | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
|
|
||
| // dynamic,1 | ||
| omp_set_schedule(omp_sched_dynamic,1); | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
|
|
||
| // guided,1 | ||
| omp_set_schedule(omp_sched_guided,1); | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
|
|
||
| // dynamic,0 - use default chunk size 1 | ||
| omp_set_schedule(omp_sched_dynamic,0); | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
|
|
||
| // guided,0 - use default chunk size 1 | ||
| omp_set_schedule(omp_sched_guided,0); | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
|
|
||
| if (err) { | ||
| printf("failed, err = %d\n", err); | ||
| return 1; | ||
| } else { | ||
| printf("passed\n"); | ||
| return 0; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,196 @@ | ||
| // RUN: %libomp-compile | ||
| // RUN: env OMP_SCHEDULE=guided %libomp-run | ||
| // RUN: env OMP_SCHEDULE=guided,1 %libomp-run 1 | ||
| // RUN: env OMP_SCHEDULE=guided,2 %libomp-run 2 | ||
| // RUN: env OMP_SCHEDULE=dynamic %libomp-run | ||
| // RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1 | ||
| // RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2 | ||
| // RUN: env OMP_SCHEDULE=auto %libomp-run | ||
|
|
||
| // The test checks schedule(simd:runtime) | ||
| // in combination with OMP_SCHEDULE=guided[,chunk] | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <omp.h> | ||
|
|
||
| #if defined(WIN32) || defined(_WIN32) | ||
| #include <windows.h> | ||
| #define delay() Sleep(1); | ||
| #define seten(a,b,c) _putenv_s((a),(b)) | ||
| #else | ||
| #include <unistd.h> | ||
| #define delay() usleep(10); | ||
| #define seten(a,b,c) setenv((a),(b),(c)) | ||
| #endif | ||
|
|
||
| #define UBOUND 100 | ||
| #define SIMD_LEN 4 | ||
| int err = 0; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Various definitions copied from OpenMP RTL. | ||
| enum sched { | ||
| kmp_sch_static_balanced_chunked = 45, | ||
| kmp_sch_guided_simd = 46, | ||
| kmp_sch_runtime_simd = 47, | ||
| }; | ||
| typedef unsigned u32; | ||
| typedef long long i64; | ||
| typedef unsigned long long u64; | ||
| typedef struct { | ||
| int reserved_1; | ||
| int flags; | ||
| int reserved_2; | ||
| int reserved_3; | ||
| char *psource; | ||
| } id; | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
| int __kmpc_global_thread_num(id*); | ||
| void __kmpc_barrier(id*, int gtid); | ||
| void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); | ||
| void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); | ||
| int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); | ||
| int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); | ||
| #ifdef __cplusplus | ||
| } // extern "C" | ||
| #endif | ||
| // End of definitions copied from OpenMP RTL. | ||
| // --------------------------------------------------------------------------- | ||
| static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| void | ||
| run_loop( | ||
| int loop_lb, // Loop lower bound. | ||
| int loop_ub, // Loop upper bound. | ||
| int loop_st, // Loop stride. | ||
| int lchunk | ||
| ) { | ||
| static int volatile loop_sync = 0; | ||
| int lb; // Chunk lower bound. | ||
| int ub; // Chunk upper bound. | ||
| int st; // Chunk stride. | ||
| int rc; | ||
| int tid = omp_get_thread_num(); | ||
| int gtid = __kmpc_global_thread_num(&loc); | ||
| int last; | ||
| int tc = (loop_ub - loop_lb) / loop_st + 1; | ||
| int ch; | ||
| int no_chunk = 0; | ||
| if (lchunk == 0) { | ||
| no_chunk = 1; | ||
| lchunk = 1; | ||
| } | ||
| ch = lchunk * SIMD_LEN; | ||
| #if _DEBUG > 1 | ||
| printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", | ||
| gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); | ||
| #endif | ||
| // Don't test degenerate cases that should have been discovered by codegen. | ||
| if (loop_st == 0) | ||
| return; | ||
| if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) | ||
| return; | ||
| __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, | ||
| loop_lb, loop_ub, loop_st, SIMD_LEN); | ||
| { | ||
| // Let the master thread handle the chunks alone. | ||
| int chunk; // No of current chunk. | ||
| int last_ub; // Upper bound of the last processed chunk. | ||
| u64 cur; // Number of interations in current chunk. | ||
| u64 max; // Max allowed iterations for current chunk. | ||
| int undersized = 0; | ||
| last_ub = loop_ub; | ||
| chunk = 0; | ||
| max = (loop_ub - loop_lb) / loop_st + 1; | ||
| // The first chunk can consume all iterations. | ||
| while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { | ||
| ++ chunk; | ||
| #if _DEBUG | ||
| printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", | ||
| tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); | ||
| #endif | ||
| // Check if previous chunk (it is not the final chunk) is undersized. | ||
| if (undersized) | ||
| printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); | ||
| if (loop_st > 0) { | ||
| if (!(ub <= loop_ub)) | ||
| printf("Error with ub %d, %d, ch %d, err %d\n", | ||
| (int)ub, (int)loop_ub, chunk, ++err); | ||
| if (!(lb <= ub)) | ||
| printf("Error with bounds %d, %d, %d, err %d\n", | ||
| (int)lb, (int)ub, chunk, ++err); | ||
| } else { | ||
| if (!(ub >= loop_ub)) | ||
| printf("Error with ub %d, %d, %d, err %d\n", | ||
| (int)ub, (int)loop_ub, chunk, ++err); | ||
| if (!(lb >= ub)) | ||
| printf("Error with bounds %d, %d, %d, err %d\n", | ||
| (int)lb, (int)ub, chunk, ++err); | ||
| }; // if | ||
| // Stride should not change. | ||
| if (!(st == loop_st)) | ||
| printf("Error with st %d, %d, ch %d, err %d\n", | ||
| (int)st, (int)loop_st, chunk, ++err); | ||
| cur = ( ub - lb ) / loop_st + 1; | ||
| // Guided scheduling uses FP computations, so current chunk may | ||
| // be a bit bigger (+1) than allowed maximum. | ||
| if (!( cur <= max + 1)) | ||
| printf("Error with iter %d, %d, err %d\n", cur, max, ++err); | ||
| // Update maximum for the next chunk. | ||
| if (!last && cur % ch) | ||
| printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", | ||
| chunk, (int)cur, ch, tid, ++err); | ||
| if (last && !no_chunk && cur > ch) | ||
| printf("Error: too big last chunk %d (%d), tid %d, err %d\n", | ||
| (int)cur, ch, tid, ++err); | ||
| if (cur < max) | ||
| max = cur; | ||
| last_ub = ub; | ||
| undersized = (cur < ch); | ||
| #if _DEBUG > 1 | ||
| if (last) | ||
| printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", | ||
| undersized,cur,ch,tid,ub,lb,loop_st); | ||
| #endif | ||
| } // while | ||
| // Must have the right last iteration index. | ||
| if (loop_st > 0) { | ||
| if (!(last_ub <= loop_ub)) | ||
| printf("Error with last1 %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk, ++err); | ||
| if (last && !(last_ub + loop_st > loop_ub)) | ||
| printf("Error with last2 %d, %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); | ||
| } else { | ||
| if (!(last_ub >= loop_ub)) | ||
| printf("Error with last1 %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk, ++err); | ||
| if (last && !(last_ub + loop_st < loop_ub)) | ||
| printf("Error with last2 %d, %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); | ||
| } // if | ||
| } | ||
| __kmpc_barrier(&loc, gtid); | ||
| } // run_loop | ||
|
|
||
| int main(int argc, char *argv[]) | ||
| { | ||
| int chunk = 0; | ||
| if (argc > 1) { | ||
| // expect chunk size as a parameter | ||
| chunk = atoi(argv[1]); | ||
| } | ||
| #pragma omp parallel //num_threads(num_th) | ||
| run_loop(0, UBOUND, 1, chunk); | ||
| if (err) { | ||
| printf("failed, err = %d\n", err); | ||
| return 1; | ||
| } else { | ||
| printf("passed\n"); | ||
| return 0; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,201 @@ | ||
| // RUN: %libomp-compile && %libomp-run | ||
| // RUN: %libomp-run 1 && %libomp-run 2 | ||
|
|
||
| // The test checks schedule(simd:runtime) | ||
| // in combination with OMP_SCHEDULE=static[,chunk] | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <omp.h> | ||
|
|
||
| #if defined(WIN32) || defined(_WIN32) | ||
| #include <windows.h> | ||
| #define delay() Sleep(1); | ||
| #define seten(a,b,c) _putenv_s((a),(b)) | ||
| #else | ||
| #include <unistd.h> | ||
| #define delay() usleep(10); | ||
| #define seten(a,b,c) setenv((a),(b),(c)) | ||
| #endif | ||
|
|
||
| #define SIMD_LEN 4 | ||
| int err = 0; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Various definitions copied from OpenMP RTL. | ||
| enum sched { | ||
| kmp_sch_static_balanced_chunked = 45, | ||
| kmp_sch_guided_simd = 46, | ||
| kmp_sch_runtime_simd = 47, | ||
| }; | ||
| typedef unsigned u32; | ||
| typedef long long i64; | ||
| typedef unsigned long long u64; | ||
| typedef struct { | ||
| int reserved_1; | ||
| int flags; | ||
| int reserved_2; | ||
| int reserved_3; | ||
| char *psource; | ||
| } id; | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
| int __kmpc_global_thread_num(id*); | ||
| void __kmpc_barrier(id*, int gtid); | ||
| void __kmpc_dispatch_init_4(id*, int, enum sched, int, int, int, int); | ||
| void __kmpc_dispatch_init_8(id*, int, enum sched, i64, i64, i64, i64); | ||
| int __kmpc_dispatch_next_4(id*, int, void*, void*, void*, void*); | ||
| int __kmpc_dispatch_next_8(id*, int, void*, void*, void*, void*); | ||
| #ifdef __cplusplus | ||
| } // extern "C" | ||
| #endif | ||
| // End of definitions copied from OpenMP RTL. | ||
| // --------------------------------------------------------------------------- | ||
| static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| void | ||
| run_loop( | ||
| int loop_lb, // Loop lower bound. | ||
| int loop_ub, // Loop upper bound. | ||
| int loop_st, // Loop stride. | ||
| int lchunk | ||
| ) { | ||
| static int volatile loop_sync = 0; | ||
| int lb; // Chunk lower bound. | ||
| int ub; // Chunk upper bound. | ||
| int st; // Chunk stride. | ||
| int rc; | ||
| int tid = omp_get_thread_num(); | ||
| int gtid = __kmpc_global_thread_num(&loc); | ||
| int last; | ||
| int tc = (loop_ub - loop_lb) / loop_st + 1; | ||
| int ch; | ||
| int no_chunk = 0; | ||
| if (lchunk == 0) { | ||
| no_chunk = 1; | ||
| lchunk = 1; | ||
| } | ||
| ch = lchunk * SIMD_LEN; | ||
| #if _DEBUG > 1 | ||
| printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n", | ||
| gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk); | ||
| #endif | ||
| // Don't test degenerate cases that should have been discovered by codegen. | ||
| if (loop_st == 0) | ||
| return; | ||
| if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub) | ||
| return; | ||
| __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd, | ||
| loop_lb, loop_ub, loop_st, SIMD_LEN); | ||
| { | ||
| // Let the master thread handle the chunks alone. | ||
| int chunk; // No of current chunk. | ||
| int last_ub; // Upper bound of the last processed chunk. | ||
| u64 cur; // Number of interations in current chunk. | ||
| u64 max; // Max allowed iterations for current chunk. | ||
| int undersized = 0; | ||
| last_ub = loop_ub; | ||
| chunk = 0; | ||
| max = (loop_ub - loop_lb) / loop_st + 1; | ||
| // The first chunk can consume all iterations. | ||
| while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) { | ||
| ++ chunk; | ||
| #if _DEBUG | ||
| printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n", | ||
| tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1)); | ||
| #endif | ||
| // Check if previous chunk (it is not the final chunk) is undersized. | ||
| if (undersized) | ||
| printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err); | ||
| if (loop_st > 0) { | ||
| if (!(ub <= loop_ub)) | ||
| printf("Error with ub %d, %d, ch %d, err %d\n", | ||
| (int)ub, (int)loop_ub, chunk, ++err); | ||
| if (!(lb <= ub)) | ||
| printf("Error with bounds %d, %d, %d, err %d\n", | ||
| (int)lb, (int)ub, chunk, ++err); | ||
| } else { | ||
| if (!(ub >= loop_ub)) | ||
| printf("Error with ub %d, %d, %d, err %d\n", | ||
| (int)ub, (int)loop_ub, chunk, ++err); | ||
| if (!(lb >= ub)) | ||
| printf("Error with bounds %d, %d, %d, err %d\n", | ||
| (int)lb, (int)ub, chunk, ++err); | ||
| }; // if | ||
| // Stride should not change. | ||
| if (!(st == loop_st)) | ||
| printf("Error with st %d, %d, ch %d, err %d\n", | ||
| (int)st, (int)loop_st, chunk, ++err); | ||
| cur = ( ub - lb ) / loop_st + 1; | ||
| // Guided scheduling uses FP computations, so current chunk may | ||
| // be a bit bigger (+1) than allowed maximum. | ||
| if (!( cur <= max + 1)) | ||
| printf("Error with iter %d, %d, err %d\n", cur, max, ++err); | ||
| // Update maximum for the next chunk. | ||
| if (last) { | ||
| if (!no_chunk && cur > ch) | ||
| printf("Error: too big last chunk %d (%d), tid %d, err %d\n", | ||
| (int)cur, ch, tid, ++err); | ||
| } else { | ||
| if (cur % ch) | ||
| printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", | ||
| chunk, (int)cur, ch, tid, ++err); | ||
| } | ||
| if (cur < max) | ||
| max = cur; | ||
| last_ub = ub; | ||
| undersized = (cur < ch); | ||
| #if _DEBUG > 1 | ||
| if (last) | ||
| printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n", | ||
| undersized,cur,ch,tid,ub,lb,loop_st); | ||
| #endif | ||
| } // while | ||
| // Must have the right last iteration index. | ||
| if (loop_st > 0) { | ||
| if (!(last_ub <= loop_ub)) | ||
| printf("Error with last1 %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk, ++err); | ||
| if (last && !(last_ub + loop_st > loop_ub)) | ||
| printf("Error with last2 %d, %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); | ||
| } else { | ||
| if (!(last_ub >= loop_ub)) | ||
| printf("Error with last1 %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_ub, chunk, ++err); | ||
| if (last && !(last_ub + loop_st < loop_ub)) | ||
| printf("Error with last2 %d, %d, %d, ch %d, err %d\n", | ||
| (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err); | ||
| } // if | ||
| } | ||
| __kmpc_barrier(&loc, gtid); | ||
| } // run_loop | ||
|
|
||
| int main(int argc, char *argv[]) | ||
| { | ||
| int chunk = 0; | ||
| if (argc > 1) { | ||
| char *buf = malloc(8 + strlen(argv[1])); | ||
| // expect chunk size as a parameter | ||
| chunk = atoi(argv[1]); | ||
| strcpy(buf,"static,"); | ||
| strcat(buf,argv[1]); | ||
| seten("OMP_SCHEDULE",buf,1); | ||
| printf("Testing schedule(simd:%s)\n", buf); | ||
| free(buf); | ||
| } else { | ||
| seten("OMP_SCHEDULE","static",1); | ||
| printf("Testing schedule(simd:static)\n"); | ||
| } | ||
| #pragma omp parallel// num_threads(num_th) | ||
| run_loop(0, 26, 1, chunk); | ||
| if (err) { | ||
| printf("failed, err = %d\n", err); | ||
| return 1; | ||
| } else { | ||
| printf("passed\n"); | ||
| return 0; | ||
| } | ||
| } |