Skip to content

Commit

Permalink
[OpenMP] Fix transformed loop's var privacy
Browse files Browse the repository at this point in the history
Without this patch, the following example crashes Clang:

```
 #pragma omp target map(i)
 #pragma omp tile sizes(2)
 for (i = 0; i < N; ++i)
   ;
```

This patch fixes the crash by changing `Sema::isOpenMPPrivateDecl` not
to identify `i` as private just because it's the loop variable of a
`tile` construct.

While OpenMP TR11 and earlier do specify privacy for loop variables of
loops *generated* from a `tile` construct, I haven't found text
stating that the original loop variable must be private in the above
example, so this patch leaves it shared.  Even so, it is a bit
unexpected that value of `i` after the loop is `N - 1` instead of `N`.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D151356
  • Loading branch information
jdenny-ornl committed Jun 2, 2023
1 parent 726835c commit 19841e4
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 5 deletions.
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaOpenMP.cpp
Expand Up @@ -2551,7 +2551,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level,
}
}
}
if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) {
if (isOpenMPLoopDirective(DSAStack->getCurrentDirective()) &&
!isOpenMPLoopTransformationDirective(DSAStack->getCurrentDirective())) {
if (DSAStack->getAssociatedLoops() > 0 && !DSAStack->isLoopStarted()) {
DSAStack->resetPossibleLoopCounter(D);
DSAStack->loopStart();
Expand Down
8 changes: 4 additions & 4 deletions clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
Expand Up @@ -21,7 +21,7 @@ extern "C" void body(...) {}
// IR-NEXT: store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4
// IR-NEXT: store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4
// IR-NEXT: store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[END_ADDR]], ptr %[[STEP_ADDR]], ptr %[[START_ADDR]])
// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[START_ADDR]], ptr %[[END_ADDR]], ptr %[[STEP_ADDR]])
// IR-NEXT: ret void
// IR-NEXT: }
extern "C" void func(int start, int end, int step) {
Expand All @@ -36,9 +36,9 @@ extern "C" void func(int start, int end, int step) {
// IR-NEXT: [[ENTRY:.*]]:
// IR-NEXT: %[[DOTGLOBAL_TID__ADDR:.+]] = alloca ptr, align 8
// IR-NEXT: %[[DOTBOUND_TID__ADDR:.+]] = alloca ptr, align 8
// IR-NEXT: %[[START_ADDR:.+]] = alloca ptr, align 8
// IR-NEXT: %[[END_ADDR:.+]] = alloca ptr, align 8
// IR-NEXT: %[[STEP_ADDR:.+]] = alloca ptr, align 8
// IR-NEXT: %[[START_ADDR:.+]] = alloca ptr, align 8
// IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4
// IR-NEXT: %[[TMP:.+]] = alloca i32, align 4
// IR-NEXT: %[[I:.+]] = alloca i32, align 4
Expand All @@ -57,12 +57,12 @@ extern "C" void func(int start, int end, int step) {
// IR-NEXT: %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
// IR-NEXT: store ptr %[[DOTGLOBAL_TID_:.+]], ptr %[[DOTGLOBAL_TID__ADDR]], align 8
// IR-NEXT: store ptr %[[DOTBOUND_TID_:.+]], ptr %[[DOTBOUND_TID__ADDR]], align 8
// IR-NEXT: store ptr %[[START:.+]], ptr %[[START_ADDR]], align 8
// IR-NEXT: store ptr %[[END:.+]], ptr %[[END_ADDR]], align 8
// IR-NEXT: store ptr %[[STEP:.+]], ptr %[[STEP_ADDR]], align 8
// IR-NEXT: store ptr %[[START:.+]], ptr %[[START_ADDR]], align 8
// IR-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[START_ADDR]], align 8
// IR-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[END_ADDR]], align 8
// IR-NEXT: %[[TMP1:.+]] = load ptr, ptr %[[STEP_ADDR]], align 8
// IR-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[START_ADDR]], align 8
// IR-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4
// IR-NEXT: store i32 %[[TMP3]], ptr %[[I]], align 4
// IR-NEXT: %[[TMP4:.+]] = load i32, ptr %[[TMP2]], align 4
Expand Down
62 changes: 62 additions & 0 deletions openmp/libomptarget/test/offloading/target-tile.c
@@ -0,0 +1,62 @@
// Check that omp tile (introduced in OpenMP 5.1) is permitted and behaves when
// strictly nested within omp target.

// RUN: %libomptarget-compile-generic -fopenmp-version=51
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic

#include <stdio.h>

#define I_NTILES 8
#define J_NTILES 9
#define I_NELEMS 2
#define J_NELEMS 3

int main() {
int order[I_NTILES][J_NTILES][I_NELEMS][J_NELEMS];
int i, j;
#pragma omp target map(tofrom: i, j)
{
int next = 0;
#pragma omp tile sizes(I_NELEMS, J_NELEMS)
for (i = 0; i < I_NTILES * I_NELEMS; ++i) {
for (j = 0; j < J_NTILES * J_NELEMS; ++j) {
int iTile = i / I_NELEMS;
int jTile = j / J_NELEMS;
int iElem = i % I_NELEMS;
int jElem = j % J_NELEMS;
order[iTile][jTile][iElem][jElem] = next++;
}
}
}
int expected = 0;
for (int iTile = 0; iTile < I_NTILES; ++iTile) {
for (int jTile = 0; jTile < J_NTILES; ++jTile) {
for (int iElem = 0; iElem < I_NELEMS; ++iElem) {
for (int jElem = 0; jElem < J_NELEMS; ++jElem) {
int actual = order[iTile][jTile][iElem][jElem];
if (expected != actual) {
printf("error: order[%d][%d][%d][%d] = %d, expected %d\n",
iTile, jTile, iElem, jElem, actual, expected);
return 1;
}
++expected;
}
}
}
}
// Tiling leaves the loop variables with their values from the final iteration
// rather than with the usual +1.
expected = I_NTILES * I_NELEMS - 1;
if (i != expected) {
printf("error: i = %d, expected %d\n", i, expected);
return 1;
}
expected = J_NTILES * J_NELEMS - 1;
if (j != expected) {
printf("error: j = %d, expected %d\n", j, expected);
return 1;
}
// CHECK: success
printf("success\n");
return 0;
}

0 comments on commit 19841e4

Please sign in to comment.