New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][OpenMP] Fix ordering of processing of map clauses when mapping a struct. #72410
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Gheorghe-Teodor Bercea (doru1004) ChangesMapping a struct, if done in the wrong order, can overwrite the pointer attachment details. This fixes this problem. Original failing example:
Previous attempt at fixing this: #70821 Full diff: https://github.com/llvm/llvm-project/pull/72410.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index d2be8141a3a4b31..50518c46152bbaf 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7731,6 +7731,8 @@ class MappableExprsHandler {
IsImplicit, Mapper, VarRef, ForDeviceAddr);
};
+ // Iterate over all non-section maps first to avoid overwriting pointer
+ // attachment.
for (const auto *Cl : Clauses) {
const auto *C = dyn_cast<OMPMapClause>(Cl);
if (!C)
@@ -7742,15 +7744,42 @@ class MappableExprsHandler {
else if (C->getMapType() == OMPC_MAP_alloc)
Kind = Allocs;
const auto *EI = C->getVarRefs().begin();
- for (const auto L : C->component_lists()) {
- const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
- InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
- C->getMapTypeModifiers(), std::nullopt,
- /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
- E);
- ++EI;
+ if (*EI && !isa<OMPArraySectionExpr>(*EI)) {
+ for (const auto L : C->component_lists()) {
+ const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
+ InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
+ C->getMapTypeModifiers(), std::nullopt,
+ /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
+ E);
+ ++EI;
+ }
+ }
+ }
+
+ // Process the maps with sections.
+ for (const auto *Cl : Clauses) {
+ const auto *C = dyn_cast<OMPMapClause>(Cl);
+ if (!C)
+ continue;
+ MapKind Kind = Other;
+ if (llvm::is_contained(C->getMapTypeModifiers(),
+ OMPC_MAP_MODIFIER_present))
+ Kind = Present;
+ else if (C->getMapType() == OMPC_MAP_alloc)
+ Kind = Allocs;
+ const auto *EI = C->getVarRefs().begin();
+ if (*EI && isa<OMPArraySectionExpr>(*EI)) {
+ for (const auto L : C->component_lists()) {
+ const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
+ InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
+ C->getMapTypeModifiers(), std::nullopt,
+ /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
+ E);
+ ++EI;
+ }
}
}
+
for (const auto *Cl : Clauses) {
const auto *C = dyn_cast<OMPToClause>(Cl);
if (!C)
@@ -7767,6 +7796,7 @@ class MappableExprsHandler {
++EI;
}
}
+
for (const auto *Cl : Clauses) {
const auto *C = dyn_cast<OMPFromClause>(Cl);
if (!C)
diff --git a/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp b/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp
new file mode 100644
index 000000000000000..c7ce4bade8de9a2
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp
@@ -0,0 +1,114 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct Descriptor {
+ int *datum;
+ long int x;
+ int *more_datum;
+ int xi;
+ int val_datum, val_more_datum;
+ long int arr[1][30];
+ int val_arr;
+};
+
+int main() {
+ Descriptor dat = Descriptor();
+ dat.datum = (int *)malloc(sizeof(int) * 10);
+ dat.more_datum = (int *)malloc(sizeof(int) * 20);
+ dat.xi = 3;
+ dat.arr[0][0] = 1;
+
+ dat.datum[7] = 7;
+ dat.more_datum[17] = 17;
+
+ /// The struct is mapped with type 0x0 when the pointer fields are mapped.
+ /// The struct is also map explicitely by the user. The second mapping by
+ /// the user must not overwrite the mapping set up for the pointer fields
+ /// when mapping the struct happens after the mapping of the pointers.
+
+ // clang-format off
+ // CHECK: Libomptarget --> Entry 0: Base=[[DAT_HST_PTR_BASE:0x.*]], Begin=[[DAT_HST_PTR_BASE]], Size=288, Type=0x0, Name=unknown
+ // CHECK: Libomptarget --> Entry 1: Base=[[DAT_HST_PTR_BASE]], Begin=[[DAT_HST_PTR_BASE]], Size=288, Type=0x1000000000001, Name=unknown
+ // CHECK: Libomptarget --> Entry 2: Base=[[DAT_HST_PTR_BASE]], Begin=[[DATUM_HST_PTR_BASE:0x.*]], Size=40, Type=0x1000000000011, Name=unknown
+ // CHECK: Libomptarget --> Entry 3: Base=[[MORE_DATUM_HST_PTR_BASE:0x.*]], Begin=[[MORE_DATUM_HST_PTR_BEGIN:0x.*]], Size=80, Type=0x1000000000011, Name=unknown
+ // clang-format on
+
+ /// The struct will be mapped in the same order as the above entries.
+
+ /// First argument is the struct itself and it will be mapped once.
+
+ // clang-format off
+ // CHECK: Libomptarget --> Looking up mapping(HstPtrBegin=[[DAT_HST_PTR_BASE]], Size=288)...
+ // CHECK: PluginInterface --> MemoryManagerTy::allocate: size 288 with host pointer [[DAT_HST_PTR_BASE]].
+ // CHECK: Libomptarget --> Creating new map entry with HstPtrBase=[[DAT_HST_PTR_BASE]], HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtAllocBegin=[[DAT_DEVICE_PTR_BASE:0x.*]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=1, HoldRefCount=0, Name=unknown
+ // CHECK: Libomptarget --> Moving 288 bytes (hst:[[DAT_HST_PTR_BASE]]) -> (tgt:[[DAT_DEVICE_PTR_BASE]])
+ // clang-format on
+
+ /// Second argument is dat.datum:
+ // clang-format off
+ // CHECK: Libomptarget --> Looking up mapping(HstPtrBegin=[[DATUM_HST_PTR_BASE]], Size=40)...
+ // CHECK: PluginInterface --> MemoryManagerTy::allocate: size 40 with host pointer [[DATUM_HST_PTR_BASE]].
+ // CHECK: Libomptarget --> Creating new map entry with HstPtrBase=[[DATUM_HST_PTR_BASE]], HstPtrBegin=[[DATUM_HST_PTR_BASE]], TgtAllocBegin=[[DATUM_DEVICE_PTR_BASE:0x.*]], TgtPtrBegin=[[DATUM_DEVICE_PTR_BASE]], Size=40, DynRefCount=1, HoldRefCount=0, Name=unknown
+ // CHECK: Libomptarget --> Moving 40 bytes (hst:[[DATUM_HST_PTR_BASE]]) -> (tgt:[[DATUM_DEVICE_PTR_BASE]])
+ // clang-format on
+
+ /// Third argument is dat.more_datum:
+ // clang-format off
+ // CHECK: Libomptarget --> Looking up mapping(HstPtrBegin=[[MORE_DATUM_HST_PTR_BEGIN]], Size=80)...
+ // CHECK: PluginInterface --> MemoryManagerTy::allocate: size 80 with host pointer [[MORE_DATUM_HST_PTR_BEGIN]].
+ // CHECK: Libomptarget --> Creating new map entry with HstPtrBase=[[MORE_DATUM_HST_PTR_BEGIN]], HstPtrBegin=[[MORE_DATUM_HST_PTR_BEGIN]], TgtAllocBegin=[[MORE_DATUM_DEVICE_PTR_BEGIN:0x.*]], TgtPtrBegin=[[MORE_DATUM_DEVICE_PTR_BEGIN]], Size=80, DynRefCount=1, HoldRefCount=0, Name=unknown
+ // CHECK: Libomptarget --> Moving 80 bytes (hst:[[MORE_DATUM_HST_PTR_BEGIN]]) -> (tgt:[[MORE_DATUM_DEVICE_PTR_BEGIN]])
+ // clang-format on
+
+#pragma omp target enter data map(to : dat.datum[ : 10]) \
+ map(to : dat.more_datum[ : 20]) map(to : dat)
+
+ /// Checks induced by having a target region:
+ // clang-format off
+ // CHECK: Libomptarget --> Entry 0: Base=[[DAT_HST_PTR_BASE]], Begin=[[DAT_HST_PTR_BASE]], Size=288, Type=0x223, Name=unknown
+ // CHECK: Libomptarget --> Mapping exists (implicit) with HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=2 (incremented), HoldRefCount=0, Name=unknown
+ // CHECK: Libomptarget --> Obtained target argument [[DAT_DEVICE_PTR_BASE]] from host pointer [[DAT_HST_PTR_BASE]]
+ // clang-format on
+
+#pragma omp target
+ {
+ dat.xi = 4;
+ dat.datum[7]++;
+ dat.more_datum[17]++;
+ dat.val_datum = dat.datum[7];
+ dat.val_more_datum = dat.more_datum[17];
+ dat.datum[dat.arr[0][0]] = dat.xi;
+ dat.val_arr = dat.datum[dat.arr[0][0]];
+ }
+
+ /// Post-target region checks:
+ // clang-format off
+ // CHECK: Libomptarget --> Mapping exists with HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=1 (decremented), HoldRefCount=0
+ // clang-format on
+
+#pragma omp target exit data map(from : dat)
+
+ /// Target data end checks:
+ // clang-format off
+ // CHECK: Libomptarget --> Mapping exists with HstPtrBegin=[[DAT_HST_PTR_BASE]], TgtPtrBegin=[[DAT_DEVICE_PTR_BASE]], Size=288, DynRefCount=0 (decremented, delayed deletion), HoldRefCount=0
+ // CHECK: Libomptarget --> Moving 288 bytes (tgt:[[DAT_DEVICE_PTR_BASE]]) -> (hst:[[DAT_HST_PTR_BASE]])
+ // clang-format on
+
+ // CHECK: dat.xi = 4
+ // CHECK: dat.val_datum = 8
+ // CHECK: dat.val_more_datum = 18
+ // CHECK: dat.datum[dat.arr[0][0]] = 0
+ // CHECK: dat.val_arr = 4
+
+ printf("dat.xi = %d\n", dat.xi);
+ printf("dat.val_datum = %d\n", dat.val_datum);
+ printf("dat.val_more_datum = %d\n", dat.val_more_datum);
+ printf("dat.datum[dat.arr[0][0]] = %d\n", dat.datum[dat.arr[0][0]]);
+ printf("dat.val_arr = %d\n", dat.val_arr);
+
+ return 0;
+}
|
6f9450b
to
ed9d505
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This being in clang instead seems like a good change. Are there no CodeGen tests changed? We should add one if so. Probably just take your libomptarget
test and run update_cc_test_checks
on it with the arguments found in other test files.
No code gen test changes. Happy to add one no problem. |
ed9d505
to
a16ffab
Compare
Just added the test. |
/*ReturnDevicePointer=*/false, C->isImplicit(), | ||
std::get<2>(L), E); | ||
++EI; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This duplicates the loop nest, which is very unfortunate. Why not actually sort the clause list? That will also make it easier to add/change things in the future, e.g., we simply modify the comparator.
for (const auto L : C->component_lists()) { | ||
const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; | ||
InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), | ||
C->getMapTypeModifiers(), std::nullopt, | ||
/*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), | ||
E); | ||
++EI; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's strange that array sections are not handled correctly here, need to check why componen_list() does not return it correctly. Or just InfoGen does not process it correctly.
d292290
to
2ea93a7
Compare
2ea93a7
to
6712acd
Compare
@@ -7731,10 +7731,30 @@ class MappableExprsHandler { | |||
IsImplicit, Mapper, VarRef, ForDeviceAddr); | |||
}; | |||
|
|||
// Sort all map clauses and make sure all the maps containing array | |||
// sections are processed last. | |||
llvm::SmallVector<const OMPMapClause *, 16> SortedMapClauses; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What implies that this is sorted?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand the question.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you asking what is the sorting criteria?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's called "SortedMapClauses" but I don't see any sorting, we just push back into the vector.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, so I just moved all the maps containing sections at the end of the clause list. I want those maps to happen last after all the structs and other maps have happened.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well I don't see anything other that's wrong other than the order and the order comes from how the user wrote the code so I am not sure how else to fix it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't find any "bug" in the existing code. It works as intended. The problem is that it doesn't handle these types of situations and I don't see how else to fix an ordering problem other than by re-ordering. If you have a different solution in mind please let me know.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@alexey-bataev I have looked at the code again and I really can't see another solution to this problem. If you have a different fix in mind please let me know.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will try to investigate it tomorrow, probably
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, looks like we emit something like this as maptypes:
0x0
0x1000000000011 - MEMBER_OF_1 | MAP_PTR_AND_OBJ | MAP_TO - array section
0x1000000000001 - MEMBER_OF_1 | MAP_TO - whole struct
I think the whole struct info can be copied to the very first element instead of the placeholder, if we see that the whole struct is mapped, and the corresponding element can be removed out of the list. I think this can be done in emitCombinedEntry function
Fix mapping of structs to device. The following example fails: ``` #include <stdio.h> #include <stdlib.h> struct Descriptor { int *datum; long int x; int xi; long int arr[1][30]; }; int main() { Descriptor dat = Descriptor(); dat.datum = (int *)malloc(sizeof(int)*10); dat.xi = 3; dat.arr[0][0] = 1; #pragma omp target enter data map(to: dat.datum[:10]) map(to: dat) #pragma omp target { dat.xi = 4; dat.datum[dat.arr[0][0]] = dat.xi; } #pragma omp target exit data map(from: dat) return 0; } ``` This is a rework of the previous attempt: #72410
Fix mapping of structs to device. The following example fails: ``` #include <stdio.h> #include <stdlib.h> struct Descriptor { int *datum; long int x; int xi; long int arr[1][30]; }; int main() { Descriptor dat = Descriptor(); dat.datum = (int *)malloc(sizeof(int)*10); dat.xi = 3; dat.arr[0][0] = 1; #pragma omp target enter data map(to: dat.datum[:10]) map(to: dat) #pragma omp target { dat.xi = 4; dat.datum[dat.arr[0][0]] = dat.xi; } #pragma omp target exit data map(from: dat) return 0; } ``` This is a rework of the previous attempt: llvm/llvm-project#72410
Mapping a struct, if done in the wrong order, can overwrite the pointer attachment details. This fixes this problem.
Original failing example:
Previous attempt at fixing this: #70821