diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 6e83e20d96d59..0b6f5be9f0447 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2551,7 +2551,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, } } } - if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { + if (isOpenMPLoopDirective(DSAStack->getCurrentDirective()) && + !isOpenMPLoopTransformationDirective(DSAStack->getCurrentDirective())) { if (DSAStack->getAssociatedLoops() > 0 && !DSAStack->isLoopStarted()) { DSAStack->resetPossibleLoopCounter(D); DSAStack->loopStart(); diff --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp index 44127525b2527..a710d889a0b6d 100644 --- a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp +++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp @@ -21,7 +21,7 @@ extern "C" void body(...) {} // IR-NEXT: store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4 // IR-NEXT: store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4 // IR-NEXT: store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[END_ADDR]], ptr %[[STEP_ADDR]], ptr %[[START_ADDR]]) +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[START_ADDR]], ptr %[[END_ADDR]], ptr %[[STEP_ADDR]]) // IR-NEXT: ret void // IR-NEXT: } extern "C" void func(int start, int end, int step) { @@ -36,9 +36,9 @@ extern "C" void func(int start, int end, int step) { // IR-NEXT: [[ENTRY:.*]]: // IR-NEXT: %[[DOTGLOBAL_TID__ADDR:.+]] = alloca ptr, align 8 // IR-NEXT: %[[DOTBOUND_TID__ADDR:.+]] = alloca ptr, align 8 +// IR-NEXT: %[[START_ADDR:.+]] = alloca ptr, align 8 // IR-NEXT: %[[END_ADDR:.+]] = alloca ptr, align 8 // IR-NEXT: %[[STEP_ADDR:.+]] = alloca ptr, align 8 -// IR-NEXT: %[[START_ADDR:.+]] = alloca ptr, align 8 // IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4 // IR-NEXT: %[[TMP:.+]] = alloca i32, align 4 // IR-NEXT: %[[I:.+]] = alloca i32, align 4 @@ -57,12 +57,12 @@ extern "C" void func(int start, int end, int step) { // IR-NEXT: %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4 // IR-NEXT: store ptr %[[DOTGLOBAL_TID_:.+]], ptr %[[DOTGLOBAL_TID__ADDR]], align 8 // IR-NEXT: store ptr %[[DOTBOUND_TID_:.+]], ptr %[[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store ptr %[[START:.+]], ptr %[[START_ADDR]], align 8 // IR-NEXT: store ptr %[[END:.+]], ptr %[[END_ADDR]], align 8 // IR-NEXT: store ptr %[[STEP:.+]], ptr %[[STEP_ADDR]], align 8 -// IR-NEXT: store ptr %[[START:.+]], ptr %[[START_ADDR]], align 8 +// IR-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[START_ADDR]], align 8 // IR-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[END_ADDR]], align 8 // IR-NEXT: %[[TMP1:.+]] = load ptr, ptr %[[STEP_ADDR]], align 8 -// IR-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[START_ADDR]], align 8 // IR-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 // IR-NEXT: store i32 %[[TMP3]], ptr %[[I]], align 4 // IR-NEXT: %[[TMP4:.+]] = load i32, ptr %[[TMP2]], align 4 diff --git a/openmp/libomptarget/test/offloading/target-tile.c b/openmp/libomptarget/test/offloading/target-tile.c new file mode 100644 index 0000000000000..8460b43b6f9c7 --- /dev/null +++ b/openmp/libomptarget/test/offloading/target-tile.c @@ -0,0 +1,62 @@ +// Check that omp tile (introduced in OpenMP 5.1) is permitted and behaves when +// strictly nested within omp target. + +// RUN: %libomptarget-compile-generic -fopenmp-version=51 +// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic + +#include + +#define I_NTILES 8 +#define J_NTILES 9 +#define I_NELEMS 2 +#define J_NELEMS 3 + +int main() { + int order[I_NTILES][J_NTILES][I_NELEMS][J_NELEMS]; + int i, j; + #pragma omp target map(tofrom: i, j) + { + int next = 0; + #pragma omp tile sizes(I_NELEMS, J_NELEMS) + for (i = 0; i < I_NTILES * I_NELEMS; ++i) { + for (j = 0; j < J_NTILES * J_NELEMS; ++j) { + int iTile = i / I_NELEMS; + int jTile = j / J_NELEMS; + int iElem = i % I_NELEMS; + int jElem = j % J_NELEMS; + order[iTile][jTile][iElem][jElem] = next++; + } + } + } + int expected = 0; + for (int iTile = 0; iTile < I_NTILES; ++iTile) { + for (int jTile = 0; jTile < J_NTILES; ++jTile) { + for (int iElem = 0; iElem < I_NELEMS; ++iElem) { + for (int jElem = 0; jElem < J_NELEMS; ++jElem) { + int actual = order[iTile][jTile][iElem][jElem]; + if (expected != actual) { + printf("error: order[%d][%d][%d][%d] = %d, expected %d\n", + iTile, jTile, iElem, jElem, actual, expected); + return 1; + } + ++expected; + } + } + } + } + // Tiling leaves the loop variables with their values from the final iteration + // rather than with the usual +1. + expected = I_NTILES * I_NELEMS - 1; + if (i != expected) { + printf("error: i = %d, expected %d\n", i, expected); + return 1; + } + expected = J_NTILES * J_NELEMS - 1; + if (j != expected) { + printf("error: j = %d, expected %d\n", j, expected); + return 1; + } + // CHECK: success + printf("success\n"); + return 0; +}