-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OpenMP] add loop collapse tests (#86243)
This PR adds loop collapse tests ported from MSVC. --------- Co-authored-by: Vadim Paretsky <b-vadipa@microsoft.com>
- Loading branch information
1 parent
b1a633b
commit 7db4046
Showing
7 changed files
with
511 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
#include <omp.h> | ||
#include <malloc.h> | ||
#include <stdio.h> | ||
#include <memory.h> | ||
|
||
#define LOOP_IV_TYPE0 LOOP_TYPES | ||
#define LOOP_TYPE0 LOOP_TYPES | ||
#define LOOP_STYPE0 LOOP_TYPES | ||
|
||
#define LOOP_IV_TYPE1 LOOP_TYPES | ||
#define LOOP_TYPE1 LOOP_TYPES | ||
#define LOOP_STYPE1 LOOP_TYPES | ||
|
||
#define LOOP_IV_TYPE2 LOOP_TYPES | ||
#define LOOP_TYPE2 LOOP_TYPES | ||
#define LOOP_STYPE2 LOOP_TYPES | ||
|
||
#define MAX_THREADS 256 | ||
|
||
#if defined VERBOSE | ||
#define PRINTF printf | ||
#else | ||
#define PRINTF | ||
#endif | ||
|
||
LOOP_TYPE0 iLB, iUB; | ||
LOOP_TYPE1 jA0, jB0; | ||
LOOP_TYPE2 kA0, kB0; | ||
|
||
LOOP_STYPE0 iStep; | ||
LOOP_STYPE1 jA1, jB1, jStep; | ||
LOOP_STYPE2 kA1, kB1, kStep; | ||
|
||
// We can check <=, <, >=, > (!= has different pattern) | ||
// Additional definition of LOOP_LEi, LOOP_LTi, etc. is helpful to build calls | ||
// of the test from main | ||
|
||
#if defined LOOP_LE0 | ||
#define COMPARE0 <= | ||
#elif defined LOOP_LT0 | ||
#define COMPARE0 < | ||
#elif defined LOOP_GE0 | ||
#define COMPARE0 >= | ||
#elif defined LOOP_GT0 | ||
#define COMPARE0 > | ||
#endif | ||
|
||
#if defined LOOP_LE1 | ||
#define COMPARE1 <= | ||
#elif defined LOOP_LT1 | ||
#define COMPARE1 < | ||
#elif defined LOOP_GE1 | ||
#define COMPARE1 >= | ||
#elif defined LOOP_GT1 | ||
#define COMPARE1 > | ||
#endif | ||
|
||
#if defined LOOP_LE2 | ||
#define COMPARE2 <= | ||
#elif defined LOOP_LT2 | ||
#define COMPARE2 < | ||
#elif defined LOOP_GE2 | ||
#define COMPARE2 >= | ||
#elif defined LOOP_GT2 | ||
#define COMPARE2 > | ||
#endif | ||
|
||
typedef struct { | ||
LOOP_IV_TYPE0 i; | ||
LOOP_IV_TYPE1 j; | ||
LOOP_IV_TYPE2 k; | ||
} spaceType; | ||
|
||
spaceType *AllocSpace(unsigned size) { | ||
|
||
spaceType *p = (spaceType *)malloc(size * sizeof(spaceType)); | ||
memset(p, 0, size * sizeof(spaceType)); | ||
return p; | ||
} | ||
|
||
void FreeSpace(spaceType *space) { free(space); } | ||
|
||
// record an iteration | ||
void Set(spaceType *space, unsigned count, unsigned trueCount, LOOP_IV_TYPE0 i, | ||
LOOP_IV_TYPE1 j, LOOP_IV_TYPE0 k) { | ||
if (count > trueCount) { | ||
// number of iterations exceeded | ||
// will be reported with checks | ||
return; | ||
} | ||
space[count - 1].i = i; | ||
space[count - 1].j = j; | ||
space[count - 1].k = k; | ||
} | ||
int test() { | ||
int pass = 1; | ||
LOOP_IV_TYPE0 i; | ||
LOOP_IV_TYPE1 j; | ||
LOOP_IV_TYPE2 k; | ||
|
||
spaceType *openmpSpace; | ||
spaceType *scalarSpace; | ||
|
||
unsigned trueCount = 0; | ||
unsigned openmpCount = 0; | ||
unsigned scalarCount = 0; | ||
unsigned uselessThreadsOpenMP = 0; | ||
unsigned usefulThreadsOpenMP = 0; | ||
unsigned chunkSizesOpenmp[MAX_THREADS] = {0}; | ||
|
||
unsigned num_threads = omp_get_max_threads(); | ||
if (num_threads > MAX_THREADS) | ||
num_threads = MAX_THREADS; | ||
omp_set_num_threads(num_threads); | ||
|
||
// count iterations and allocate space | ||
LOOP { ++trueCount; } | ||
|
||
openmpSpace = AllocSpace(trueCount); | ||
scalarSpace = AllocSpace(trueCount); | ||
|
||
// fill the scalar (compare) space | ||
LOOP { | ||
++scalarCount; | ||
Set(scalarSpace, scalarCount, trueCount, i, j, k); | ||
} | ||
|
||
// test run body: | ||
// perform and record OpenMP iterations and thread use | ||
#pragma omp parallel num_threads(num_threads) | ||
{ | ||
#pragma omp for collapse(3) private(i, j, k) | ||
LOOP { | ||
unsigned count; | ||
unsigned gtid = omp_get_thread_num(); | ||
#pragma omp atomic update | ||
++chunkSizesOpenmp[gtid]; | ||
#pragma omp atomic capture | ||
count = ++openmpCount; | ||
Set(openmpSpace, count, trueCount, i, j, k); | ||
} | ||
} | ||
|
||
// check for the right number of iterations processed | ||
// (only need to check for less, greater is checked when recording) | ||
if (openmpCount < trueCount) { | ||
PRINTF("OpenMP FAILURE: Openmp processed fewer iterations: %d vs %d\n", | ||
openmpCount, trueCount); | ||
pass = 0; | ||
} else if (openmpCount > trueCount) { | ||
PRINTF("OpenMP FAILURE: Openmp processed more iterations: %d vs %d\n", | ||
openmpCount, trueCount); | ||
pass = 0; | ||
} | ||
|
||
// check openMP for iteration correctnes against scalar | ||
for (unsigned i = 0; i < trueCount; i++) { | ||
unsigned j; | ||
for (j = 0; j < openmpCount; j++) { | ||
if ((scalarSpace[i].i == openmpSpace[j].i) && | ||
(scalarSpace[i].j == openmpSpace[j].j) && | ||
(scalarSpace[i].k == openmpSpace[j].k)) { | ||
break; | ||
} | ||
} | ||
if (j == openmpCount) { | ||
PRINTF("OpenMP FAILURE: (%d %d %d) not processed\n", scalarSpace[i].i, | ||
scalarSpace[i].j, scalarSpace[i].k); | ||
pass = 0; | ||
} | ||
} | ||
|
||
// check for efficient thread use | ||
for (unsigned i = 0; i < num_threads; ++i) { | ||
if (chunkSizesOpenmp[i] == 0) { | ||
++uselessThreadsOpenMP; | ||
} | ||
} | ||
|
||
// a check to see if at least more than one thread was used (weakish) | ||
if ((uselessThreadsOpenMP == num_threads - 1) && (trueCount > 1)) { | ||
PRINTF("OpenMP FAILURE: threads are not used\n"); | ||
pass = 0; | ||
} | ||
|
||
#if 0 | ||
// a check to see if the load was spread more or less evenly so that | ||
// when there was more work than threads each one got at least something | ||
// (stronger, but may currently fail for a general collapse case) | ||
if ((trueCount >= num_threads) && (uselessThreadsOpenMP > 0)) { | ||
PRINTF("OpenMP FAILURE: %d threads not used with %d iterations\n", | ||
uselessThreadsOpenMP, openmpCount); | ||
pass = 0; | ||
} | ||
#endif | ||
|
||
// clean up space | ||
FreeSpace(openmpSpace); | ||
FreeSpace(scalarSpace); | ||
return pass; | ||
} |
65 changes: 65 additions & 0 deletions
65
openmp/runtime/test/worksharing/for/omp_collapse_many_GELTGT_int.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// RUN: %libomp-compile-and-run | ||
|
||
// Non-rectangular loop collapsing. | ||
// | ||
// Nested loops conform to OpenMP 5.2 standard, | ||
// inner loops bounds may depend on outer loops induction variables. | ||
|
||
#define LOOP_TYPES int | ||
#define COMPARE0 >= | ||
#define COMPARE1 < | ||
#define COMPARE2 > | ||
#define LOOP \ | ||
for (i = iLB; i COMPARE0 iUB; i += iStep) \ | ||
for (j = jA0; j COMPARE1 jB0; j += jStep) \ | ||
for (k = kA0; k COMPARE2 kB0; k += kStep) | ||
#include "collapse_test.inc" | ||
|
||
int main() { | ||
int fail; | ||
|
||
iLB = 3; | ||
iUB = -2; | ||
jA0 = -3; | ||
jA1 = 0; | ||
jB0 = -6; | ||
jB1 = 0; | ||
kA0 = -2; | ||
kA1 = 0; | ||
kB0 = -4; | ||
kB1 = 0; | ||
iStep = -1; | ||
jStep = -1; | ||
kStep = -4; | ||
PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; " | ||
"kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n", | ||
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep); | ||
fail = (test() == 0); | ||
|
||
if (!fail) { | ||
for (iStep = -3; iStep >= -6; iStep -= 2) { | ||
for (jA0 = -6; jA0 <= 6; jA0 += 3) { | ||
for (jB0 = -3; jB0 <= 10; jB0 += 3) { | ||
for (jStep = 1; jStep <= 10; jStep += 2) { | ||
for (kA0 = -2; kA0 <= 4; ++kA0) { | ||
for (kB0 = -4; kB0 <= 2; ++kB0) { | ||
for (kStep = -2; kStep >= -10; kStep -= 4) { | ||
{ | ||
PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; " | ||
"jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; " | ||
"jStep=%d; kStep=%d;\n", | ||
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, | ||
iStep, jStep, kStep); | ||
fail = fail || (test() == 0); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return fail; | ||
} |
71 changes: 71 additions & 0 deletions
71
openmp/runtime/test/worksharing/for/omp_collapse_many_GTGEGT_int.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
// RUN: %libomp-compile-and-run | ||
|
||
// Non-rectangular loop collapsing. | ||
// | ||
// Nested loops conform to OpenMP 5.2 standard, | ||
// inner loops bounds may depend on outer loops induction variables. | ||
|
||
#define LOOP_TYPES int | ||
#define COMPARE0 > | ||
#define COMPARE1 >= | ||
#define COMPARE2 > | ||
|
||
#define DLOOP_GT0 | ||
#define DLOOP_GE1 | ||
#define DLOOP_GT2 | ||
|
||
#define LOOP \ | ||
for (i = iLB; i COMPARE0 iUB; i += iStep) \ | ||
for (j = jA0; j COMPARE1 jB0; j += jStep) \ | ||
for (k = kA0; k COMPARE2 kB0; k += kStep) | ||
#include "collapse_test.inc" | ||
|
||
int main() { | ||
int fail; | ||
|
||
iLB = 3; | ||
iUB = -2; | ||
jA0 = -3; | ||
jA1 = 0; | ||
jB0 = -6; | ||
jB1 = 0; | ||
kA0 = -2; | ||
kA1 = 0; | ||
kB0 = -4; | ||
kB1 = 0; | ||
iStep = -1; | ||
jStep = -1; | ||
kStep = -4; | ||
PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; " | ||
"kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n", | ||
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep); | ||
fail = (test() == 0); | ||
|
||
if (!fail) { | ||
|
||
for (iStep = -3; iStep >= -6; iStep -= 2) { | ||
for (jA0 = -3; jA0 <= 10; jA0 += 3) { | ||
for (jB0 = -6; jB0 <= 6; jB0 += 3) { | ||
for (jStep = -1; jStep >= -10; jStep -= 2) { | ||
for (kA0 = -2; kA0 <= 4; ++kA0) { | ||
for (kB0 = -4; kB0 <= 2; ++kB0) { | ||
for (kStep = -2; kStep >= -10; kStep -= 4) { | ||
{ | ||
PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; " | ||
"jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; " | ||
"jStep=%d; kStep=%d;\n", | ||
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, | ||
iStep, jStep, kStep); | ||
fail = fail || (test() == 0); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return fail; | ||
} |
Oops, something went wrong.