Skip to content

Commit

Permalink
OpenMP: Fix for PR46868: Incorrect target map
Browse files Browse the repository at this point in the history
  • Loading branch information
zmodem committed Aug 25, 2020
1 parent 1274d83 commit 4557452
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 50 deletions.
93 changes: 70 additions & 23 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Expand Up @@ -7265,6 +7265,8 @@ class MappableExprsHandler {
// &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
//
// map(p[1:24])
// &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
// in unified shared memory mode or for local pointers
// p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
//
// map(s)
Expand Down Expand Up @@ -7400,6 +7402,7 @@ class MappableExprsHandler {
// Track if the map information being generated is the first for a list of
// components.
bool IsExpressionFirstInfo = true;
bool FirstPointerInComplexData = false;
Address BP = Address::invalid();
const Expr *AssocExpr = I->getAssociatedExpression();
const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
Expand Down Expand Up @@ -7442,10 +7445,15 @@ class MappableExprsHandler {
QualType Ty =
I->getAssociatedDeclaration()->getType().getNonReferenceType();
if (Ty->isAnyPointerType() && std::next(I) != CE) {
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());

// We do not need to generate individual map information for the
// pointer, it can be associated with the combined storage.
// No need to generate individual map information for the pointer, it
// can be associated with the combined storage if shared memory mode is
// active or the base declaration is not global variable.
const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
!VD || VD->hasLocalStorage())
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
else
FirstPointerInComplexData = IsCaptureFirstInfo;
++I;
}
}
Expand Down Expand Up @@ -7481,8 +7489,19 @@ class MappableExprsHandler {
EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
// If we encounter a PTR_AND_OBJ entry from now on it should be marked
// as MEMBER_OF the parent struct.
if (EncounteredME)
if (EncounteredME) {
ShouldBeMemberOf = true;
// Do not emit as complex pointer if this is actually not array-like
// expression.
if (FirstPointerInComplexData) {
QualType Ty = std::prev(I)
->getAssociatedDeclaration()
->getType()
.getNonReferenceType();
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
FirstPointerInComplexData = false;
}
}
}

auto Next = std::next(I);
Expand Down Expand Up @@ -7615,10 +7634,11 @@ class MappableExprsHandler {
// same expression except for the first one. We also need to signal
// this map is the first one that relates with the current capture
// (there is a set of entries for each capture).
OpenMPOffloadMappingFlags Flags = getMapTypeBits(
MapType, MapModifiers, IsImplicit,
!IsExpressionFirstInfo || RequiresReference,
IsCaptureFirstInfo && !RequiresReference);
OpenMPOffloadMappingFlags Flags =
getMapTypeBits(MapType, MapModifiers, IsImplicit,
!IsExpressionFirstInfo || RequiresReference ||
FirstPointerInComplexData,
IsCaptureFirstInfo && !RequiresReference);

if (!IsExpressionFirstInfo) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
Expand Down Expand Up @@ -7676,6 +7696,7 @@ class MappableExprsHandler {

IsExpressionFirstInfo = false;
IsCaptureFirstInfo = false;
FirstPointerInComplexData = false;
}
}
}
Expand Down Expand Up @@ -7906,6 +7927,10 @@ class MappableExprsHandler {
// emission of that entry until the whole struct has been processed.
llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
DeferredInfo;
MapBaseValuesArrayTy UseDevicePtrBasePointers;
MapValuesArrayTy UseDevicePtrPointers;
MapValuesArrayTy UseDevicePtrSizes;
MapFlagsArrayTy UseDevicePtrTypes;

for (const auto *C :
CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
Expand All @@ -7922,15 +7947,27 @@ class MappableExprsHandler {
// We potentially have map information for this declaration already.
// Look for the first set of components that refer to it.
if (It != Info.end()) {
auto CI = std::find_if(
It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
return MI.Components.back().getAssociatedDeclaration() == VD;
});
auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
return MI.Components.back().getAssociatedDeclaration() == VD;
});
// If we found a map entry, signal that the pointer has to be returned
// and move on to the next declaration.
// Exclude cases where the base pointer is mapped as array subscript,
// array section or array shaping. The base address is passed as a
// pointer to base in this case and cannot be used as a base for
// use_device_ptr list item.
if (CI != It->second.end()) {
CI->ReturnDevicePointer = true;
continue;
auto PrevCI = std::next(CI->Components.rbegin());
const auto *VarD = dyn_cast<VarDecl>(VD);
if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
isa<MemberExpr>(IE) ||
!VD->getType().getNonReferenceType()->isPointerType() ||
PrevCI == CI->Components.rend() ||
isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
VarD->hasLocalStorage()) {
CI->ReturnDevicePointer = true;
continue;
}
}
}

Expand All @@ -7951,10 +7988,12 @@ class MappableExprsHandler {
} else {
llvm::Value *Ptr =
CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
BasePointers.emplace_back(Ptr, VD);
Pointers.push_back(Ptr);
Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
UseDevicePtrBasePointers.emplace_back(Ptr, VD);
UseDevicePtrPointers.push_back(Ptr);
UseDevicePtrSizes.push_back(
llvm::Constant::getNullValue(CGF.Int64Ty));
UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
OMP_MAP_TARGET_PARAM);
}
}
}
Expand Down Expand Up @@ -8015,10 +8054,12 @@ class MappableExprsHandler {
Ptr = CGF.EmitLValue(IE).getPointer(CGF);
else
Ptr = CGF.EmitScalarExpr(IE);
BasePointers.emplace_back(Ptr, VD);
Pointers.push_back(Ptr);
Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
UseDevicePtrBasePointers.emplace_back(Ptr, VD);
UseDevicePtrPointers.push_back(Ptr);
UseDevicePtrSizes.push_back(
llvm::Constant::getNullValue(CGF.Int64Ty));
UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
OMP_MAP_TARGET_PARAM);
}
}
}
Expand Down Expand Up @@ -8108,6 +8149,12 @@ class MappableExprsHandler {
Sizes.append(CurSizes.begin(), CurSizes.end());
Types.append(CurTypes.begin(), CurTypes.end());
}
// Append data for use_device_ptr clauses.
BasePointers.append(UseDevicePtrBasePointers.begin(),
UseDevicePtrBasePointers.end());
Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end());
Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end());
Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end());
}

/// Generate all the base pointers, section pointers, sizes and map types for
Expand Down
14 changes: 7 additions & 7 deletions clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
Expand Up @@ -22,18 +22,18 @@
double *g;

// CK1: @g = global double*
// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i64] [i64 99]
// CK1: [[MTYPE00:@.+]] = {{.*}}constant [2 x i64] [i64 51, i64 96]
// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i64] [i64 99]
// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i64] [i64 99]
// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i64] [i64 99]
// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i64] [i64 99]
// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i64] [i64 99]
// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i64] [i64 99]
// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i64] [{{i64 35, i64 99|i64 99, i64 35}}]
// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 35]
// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99]
// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99]
// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35]
// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35]
// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 96]
// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 96]

// CK1-LABEL: @_Z3foo
template<typename T>
Expand All @@ -42,7 +42,7 @@ void foo(float *&lr, T *&tr) {
T *t;

// CK1: [[T:%.+]] = load double*, double** [[DECL:@g]],
// CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to double**
// CK1: store double* [[T]], double** [[CBP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]]
Expand Down Expand Up @@ -280,7 +280,7 @@ void foo(float *&lr, T *&tr) {
++l; ++t;

// CK1: [[T1:%.+]] = load i32*, i32** [[DECL:%.+]],
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
// CK1: store i32* [[T1]], i32** [[CBP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE11]]
Expand All @@ -300,7 +300,7 @@ void foo(float *&lr, T *&tr) {

// CK1: [[T2:%.+]] = load i32**, i32*** [[DECL:%.+]],
// CK1: [[T1:%.+]] = load i32*, i32** [[T2]],
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0
// CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1
// CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32**
// CK1: store i32* [[T1]], i32** [[CBP]],
// CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE12]]
Expand Down
21 changes: 9 additions & 12 deletions clang/test/OpenMP/target_map_codegen.cpp
Expand Up @@ -3195,7 +3195,7 @@ int explicit_maps_template_args_and_members(int a){

// CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
// CK22: [[SIZE04:@.+]] = private {{.*}}constant [1 x i64] [i64 20]
// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 51]

// CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
// CK22: [[SIZE05:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
Expand All @@ -3215,7 +3215,7 @@ int explicit_maps_template_args_and_members(int a){

// CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
// CK22: [[SIZE09:@.+]] = private {{.*}}constant [1 x i64] [i64 20]
// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 51]

// CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
// CK22: [[SIZE10:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
Expand All @@ -3235,7 +3235,7 @@ int explicit_maps_template_args_and_members(int a){

// CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
// CK22: [[SIZE14:@.+]] = private {{.*}}constant [1 x i64] [i64 20]
// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 51]

int a;
int c[100];
Expand Down Expand Up @@ -3331,11 +3331,10 @@ int explicit_maps_globals(void){

// CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
// CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
// CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
// CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32***
// CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
// CK22-DAG: store i32* [[RVAR0:%.+]], i32** [[CBP0]]
// CK22-DAG: store i32** @d, i32*** [[CBP0]]
// CK22-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
// CK22-DAG: [[RVAR0]] = load i32*, i32** @d
// CK22-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 2
// CK22-DAG: [[RVAR00]] = load i32*, i32** @d

Expand Down Expand Up @@ -3414,11 +3413,10 @@ int explicit_maps_globals(void){

// CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
// CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
// CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
// CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]***
// CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[ST]]**
// CK22-DAG: store [[ST]]* [[RVAR0:%.+]], [[ST]]** [[CBP0]]
// CK22-DAG: store [[ST]]** @sd, [[ST]]*** [[CBP0]]
// CK22-DAG: store [[ST]]* [[SEC0:%.+]], [[ST]]** [[CP0]]
// CK22-DAG: [[RVAR0]] = load [[ST]]*, [[ST]]** @sd
// CK22-DAG: [[SEC0]] = getelementptr {{.*}}[[ST]]* [[RVAR00:%.+]], i{{.+}} 2
// CK22-DAG: [[RVAR00]] = load [[ST]]*, [[ST]]** @sd

Expand Down Expand Up @@ -3497,11 +3495,10 @@ int explicit_maps_globals(void){

// CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
// CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
// CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]**
// CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]***
// CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[STT]]**
// CK22-DAG: store [[STT]]* [[RVAR0:%.+]], [[STT]]** [[CBP0]]
// CK22-DAG: store [[STT]]** @std, [[STT]]*** [[CBP0]]
// CK22-DAG: store [[STT]]* [[SEC0:%.+]], [[STT]]** [[CP0]]
// CK22-DAG: [[RVAR0]] = load [[STT]]*, [[STT]]** @std
// CK22-DAG: [[SEC0]] = getelementptr {{.*}}[[STT]]* [[RVAR00:%.+]], i{{.+}} 2
// CK22-DAG: [[RVAR00]] = load [[STT]]*, [[STT]]** @std

Expand Down
3 changes: 2 additions & 1 deletion clang/test/OpenMP/target_update_codegen.cpp
Expand Up @@ -737,7 +737,7 @@ void lvalue(int **BB, int a, int b) {
// CK13-64-DAG: [[ADD_PTR]] = getelementptr inbounds i32*, i32** [[B_VAL:%.+]], i64 [[IDX_EXT:%.+]]
// CK13-32-DAG: [[ADD_PTR]] = getelementptr inbounds i32*, i32** [[B_VAL:%.+]], i32 [[A_ADDR:%.+]]
// CK13-64-DAG: [[IDX_EXT]] = sext i32 [[TWO:%.+]] to i64
// CK13-DAG: [[B_VAL]] = load i32**, i32*** [[BB_ADDR]]
// CK13-DAG: [[B_VAL]] = load i32**, i32*** [[BB_ADDR:%.+]]
#pragma omp target update to(*(*(BB+a)+b))
*(*(BB+a)+b) = 1;
#pragma omp target update from(*(*(BB+a)+b))
Expand Down Expand Up @@ -978,6 +978,7 @@ void lvalue_find_base(float **f, SSA *sa) {
// CK17-DAG: [[FIVE]] = load i32, i32* [[I_2:%.+]],
// CK17-DAG: [[I_2]] = getelementptr inbounds [[SSA:%.+]], [[SSA]]* [[FOUR:%.+]], i32 0, i32 0
// CK17-DAG: [[FOUR]] = load [[SSA]]*, [[SSA]]** [[SSA_ADDR:%.+]],
// CK17-DAG: [[F]] = load float**, float*** [[F_ADDR:%.+]],

#pragma omp target update to(*(sa->sa->i+*(1+sa->i+f)))
*(sa->sa->i+*(1+sa->i+f)) = 1;
Expand Down
9 changes: 2 additions & 7 deletions openmp/libomptarget/src/omptarget.cpp
Expand Up @@ -746,14 +746,9 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
return OFFLOAD_FAIL;
}
}
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
false, IsHostPtr);
TgtBaseOffset = 0; // no offset for ptrs.
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
"object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
DPxPTR(HstPtrBase));
} else {
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
false, IsHostPtr);
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Expand Down
47 changes: 47 additions & 0 deletions openmp/libomptarget/test/env/base_ptr_ref_count.c
@@ -0,0 +1,47 @@
// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu
// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu
// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu
// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda
// REQUIRES: libomptarget-debug

#include <stdlib.h>
#include <stdio.h>

int *allocate(size_t n) {
int *ptr = malloc(sizeof(int) * n);
#pragma omp target enter data map(to : ptr[:n])
return ptr;
}

void deallocate(int *ptr, size_t n) {
#pragma omp target exit data map(delete : ptr[:n])
free(ptr);
}

#pragma omp declare target
int *cnt;
void foo() {
++(*cnt);
}
#pragma omp end declare target

int main(void) {
int *A = allocate(10);
int *V = allocate(10);
deallocate(A, 10);
deallocate(V, 10);
// CHECK-NOT: RefCount=2
cnt = malloc(sizeof(int));
*cnt = 0;
#pragma omp target data map(cnt[:1])
#pragma omp target
foo();
printf("Cnt = %d.\n", *cnt);
// CHECK: Cnt = 1.
free(cnt);

return 0;
}


0 comments on commit 4557452

Please sign in to comment.