From 9dae79ddd306585275c5061f2a4af5a485008d77 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Mon, 13 Oct 2025 20:43:06 +0100 Subject: [PATCH 1/4] [Flang][OpenMP] Fix USM `close` semantics and `use_device_ptr` - Add CLOSE map flag when USM is required. - use_device_ptr: prevent implicitly expanding member operands. - Fixes test offload/test/offloading/fortran/usm_map_close.f90. --- .../Optimizer/OpenMP/MapInfoFinalization.cpp | 149 +++++++++++++++--- 1 file changed, 124 insertions(+), 25 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 260e5256f1520..8fe4f1bab9ddf 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -40,6 +40,7 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -398,24 +399,18 @@ class MapInfoFinalizationPass baseAddrIndex); } - /// Adjusts the descriptor's map type. The main alteration that is done - /// currently is transforming the map type to `OMP_MAP_TO` where possible. - /// This is because we will always need to map the descriptor to device - /// (or at the very least it seems to be the case currently with the - /// current lowered kernel IR), as without the appropriate descriptor - /// information on the device there is a risk of the kernel IR - /// requesting for various data that will not have been copied to - /// perform things like indexing. This can cause segfaults and - /// memory access errors. However, we do not need this data mapped - /// back to the host from the device, as per the OpenMP spec we cannot alter - /// the data via resizing or deletion on the device. Discarding any - /// descriptor alterations via no map back is reasonable (and required - /// for certain segments of descriptor data like the type descriptor that are - /// global constants). This alteration is only inapplicable to `target exit` - /// and `target update` currently, and that's due to `target exit` not - /// allowing `to` mappings, and `target update` not allowing both `to` and - /// `from` simultaneously. We currently try to maintain the `implicit` flag - /// where necessary, although it does not seem strictly required. + /// Adjust the descriptor's map type such that we ensure the descriptor + /// itself is present on device when needed, without changing the user's + /// requested data mapping semantics for the underlying data. + /// + /// We conservatively transform descriptor mappings to `OMP_MAP_TO` (and + /// preserve `IMPLICIT`/`ALWAYS` when present) for structured regions. The + /// descriptor should live on device for indexing, bounds, etc., but we do + /// not require, nor want, additional mapping semantics like `CLOSE` for the + /// descriptor entry itself. `CLOSE` (and other user-provided flags) should + /// apply to the base data entry that actually carries the pointee, which is + /// generated separately as a member map. For `target exit`/`target update` + /// we keep the original map type unchanged. unsigned long getDescriptorMapType(unsigned long mapTypeFlag, mlir::Operation *target) { using mapFlags = llvm::omp::OpenMPOffloadMappingFlags; @@ -425,8 +420,24 @@ class MapInfoFinalizationPass mapFlags flags = mapFlags::OMP_MAP_TO | (mapFlags(mapTypeFlag) & - (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE | - mapFlags::OMP_MAP_ALWAYS)); + (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS)); + + // For unified_shared_memory, we additionally add `CLOSE` on the descriptor + // to ensure device-local placement where required by tests relying on USM + + // close semantics. + if (target) { + if (auto mod = target->getParentOfType()) { + if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) { + if (auto req = + mlir::dyn_cast(reqAttr)) { + if (mlir::omp::bitEnumContainsAll( + req.getValue(), + mlir::omp::ClauseRequires::unified_shared_memory)) + flags |= mapFlags::OMP_MAP_CLOSE; + } + } + } + } return llvm::to_underlying(flags); } @@ -518,6 +529,75 @@ class MapInfoFinalizationPass return newMapInfoOp; } + // Expand mappings of type(C_PTR) to map their `__address` field explicitly + // as a single pointer-sized member (USM-gated at callsite). This helps in + // USM scenarios to ensure the pointer-sized mapping is used. + mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op, + fir::FirOpBuilder &builder) { + if (!op.getMembers().empty()) + return op; + + mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType()); + if (!mlir::isa(varTy)) + return op; + auto recTy = mlir::cast(varTy); + // If not a builtin C_PTR record, skip. + if (!recTy.getName().ends_with("__builtin_c_ptr")) + return op; + + // Find the index of the c_ptr address component named "__address". + int32_t fieldIdx = recTy.getFieldIndex("__address"); + if (fieldIdx < 0) + return op; + + mlir::Location loc = op.getVarPtr().getLoc(); + mlir::Type memTy = recTy.getType(fieldIdx); + fir::IntOrValue idxConst = + mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); + mlir::Value coord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(memTy), op.getVarPtr(), + llvm::SmallVector{idxConst}); + + // Child for the `__address` member. + llvm::SmallVector> memberIdx = {{0}}; + mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); + // Force CLOSE in USM paths so the pointer gets device-local placement + // when required by tests relying on USM + close semantics. + uint64_t mapTypeVal = + op.getMapType() | + llvm::to_underlying( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); + mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr( + builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal); + + mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create( + builder, loc, coord.getType(), coord, + mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr, + builder.getAttr( + mlir::omp::VariableCaptureKind::ByRef), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector{}, + /*member_index=*/mlir::ArrayAttr{}, + /*bounds=*/op.getBounds(), + /*mapperId=*/mlir::FlatSymbolRefAttr(), + /*name=*/op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + + // Rebuild the parent as a container with the `__address` member. + mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create( + builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(), + op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector{memberMap}, + /*member_index=*/newMembersAttr, + /*bounds=*/llvm::SmallVector{}, + /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + op.replaceAllUsesWith(newParent.getResult()); + op->erase(); + return newParent; + } + mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { @@ -727,11 +807,6 @@ class MapInfoFinalizationPass argIface.getUseDeviceAddrBlockArgsStart() + argIface.numUseDeviceAddrBlockArgs()); - mlir::MutableOperandRange useDevPtrMutableOpRange = - targetDataOp.getUseDevicePtrVarsMutable(); - addOperands(useDevPtrMutableOpRange, target, - argIface.getUseDevicePtrBlockArgsStart() + - argIface.numUseDevicePtrBlockArgs()); } else if (auto targetOp = llvm::dyn_cast(target)) { mlir::MutableOperandRange hasDevAddrMutableOpRange = targetOp.getHasDeviceAddrVarsMutable(); @@ -1169,6 +1244,30 @@ class MapInfoFinalizationPass genBoxcharMemberMap(op, builder); }); + // Expand type(C_PTR) only when unified_shared_memory is required, + // to ensure device-visible pointer size/behavior in USM scenarios + // without changing default expectations elsewhere. + func->walk([&](mlir::omp::MapInfoOp op) { + // Check module requires USM; otherwise, leave mappings untouched. + auto mod = func->getParentOfType(); + bool hasUSM = false; + if (mod) { + if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) { + if (auto req = + mlir::dyn_cast(reqAttr)) { + hasUSM = mlir::omp::bitEnumContainsAll( + req.getValue(), + mlir::omp::ClauseRequires::unified_shared_memory); + } + } + } + if (!hasUSM) + return; + + builder.setInsertionPoint(op); + genCptrMemberMap(op, builder); + }); + func->walk([&](mlir::omp::MapInfoOp op) { // TODO: Currently only supports a single user for the MapInfoOp. This // is fine for the moment, as the Fortran frontend will generate a From 04e64fcb4f70172012b2a19a5481ef2880a2de8e Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Mon, 13 Oct 2025 20:58:32 +0100 Subject: [PATCH 2/4] Remove whitespace. --- .../Optimizer/OpenMP/MapInfoFinalization.cpp | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 8fe4f1bab9ddf..2549745aa9b86 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -399,18 +399,24 @@ class MapInfoFinalizationPass baseAddrIndex); } - /// Adjust the descriptor's map type such that we ensure the descriptor - /// itself is present on device when needed, without changing the user's - /// requested data mapping semantics for the underlying data. - /// - /// We conservatively transform descriptor mappings to `OMP_MAP_TO` (and - /// preserve `IMPLICIT`/`ALWAYS` when present) for structured regions. The - /// descriptor should live on device for indexing, bounds, etc., but we do - /// not require, nor want, additional mapping semantics like `CLOSE` for the - /// descriptor entry itself. `CLOSE` (and other user-provided flags) should - /// apply to the base data entry that actually carries the pointee, which is - /// generated separately as a member map. For `target exit`/`target update` - /// we keep the original map type unchanged. + /// Adjusts the descriptor's map type. The main alteration that is done + /// currently is transforming the map type to `OMP_MAP_TO` where possible. + /// This is because we will always need to map the descriptor to device + /// (or at the very least it seems to be the case currently with the + /// current lowered kernel IR), as without the appropriate descriptor + /// information on the device there is a risk of the kernel IR + /// requesting for various data that will not have been copied to + /// perform things like indexing. This can cause segfaults and + /// memory access errors. However, we do not need this data mapped + /// back to the host from the device, as per the OpenMP spec we cannot alter + /// the data via resizing or deletion on the device. Discarding any + /// descriptor alterations via no map back is reasonable (and required + /// for certain segments of descriptor data like the type descriptor that are + /// global constants). This alteration is only inapplicable to `target exit` + /// and `target update` currently, and that's due to `target exit` not + /// allowing `to` mappings, and `target update` not allowing both `to` and + /// `from` simultaneously. We currently try to maintain the `implicit` flag + /// where necessary, although it does not seem strictly required. unsigned long getDescriptorMapType(unsigned long mapTypeFlag, mlir::Operation *target) { using mapFlags = llvm::omp::OpenMPOffloadMappingFlags; @@ -421,7 +427,6 @@ class MapInfoFinalizationPass mapFlags flags = mapFlags::OMP_MAP_TO | (mapFlags(mapTypeFlag) & (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS)); - // For unified_shared_memory, we additionally add `CLOSE` on the descriptor // to ensure device-local placement where required by tests relying on USM + // close semantics. From 6f95445c7ae16c0b2cb4873e68e00cd22ef165f4 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Tue, 14 Oct 2025 16:01:16 +0100 Subject: [PATCH 3/4] Kept blockArgs for consistency. Added lowering test. --- .../Optimizer/OpenMP/MapInfoFinalization.cpp | 5 +++++ .../cptr-usm-close-and-use-device-ptr.f90 | 21 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 2549745aa9b86..2e3af3bc6c2d2 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -812,6 +812,11 @@ class MapInfoFinalizationPass argIface.getUseDeviceAddrBlockArgsStart() + argIface.numUseDeviceAddrBlockArgs()); + mlir::MutableOperandRange useDevPtrMutableOpRange = + targetDataOp.getUseDevicePtrVarsMutable(); + addOperands(useDevPtrMutableOpRange, target, + argIface.getUseDevicePtrBlockArgsStart() + + argIface.numUseDevicePtrBlockArgs()); } else if (auto targetOp = llvm::dyn_cast(target)) { mlir::MutableOperandRange hasDevAddrMutableOpRange = targetOp.getHasDeviceAddrVarsMutable(); diff --git a/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 new file mode 100644 index 0000000000000..7fc30b431ad49 --- /dev/null +++ b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 @@ -0,0 +1,21 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +! +! Checks: +! - C_PTR mappings expand to `__address` member with CLOSE under USM paths. +! - use_device_ptr does not implicitly expand member operands in the clause. + +subroutine only_cptr_use_device_ptr + use iso_c_binding + type(c_ptr) :: cptr + integer :: i + + !$omp target data use_device_ptr(cptr) map(tofrom: i) + !$omp end target data +end subroutine + +! CHECK-LABEL: func.func @_QPonly_cptr_use_device_ptr() +! CHECK: %[[I_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "i"} +! CHECK: %[[CP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>, !fir.type<{{.*}}__builtin_c_ptr{{.*}}>) map_clauses(return_param) capture(ByRef) -> !fir.ref> +! CHECK: omp.target_data map_entries(%[[I_MAP]] : !fir.ref) use_device_ptr(%[[CP_MAP]] -> %{{.*}} : !fir.ref>) { +! CHECK: omp.terminator +! CHECK: } From 04cb5e87a0b89537b6adbadfa1dbb1cb3404c6b5 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Tue, 14 Oct 2025 16:21:57 +0100 Subject: [PATCH 4/4] Refactor to add utility function moduleRequiresUSM. --- .../Optimizer/OpenMP/MapInfoFinalization.cpp | 43 +++++++------------ 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 2e3af3bc6c2d2..2bbd8034fa523 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -129,6 +129,17 @@ class MapInfoFinalizationPass } } + /// Return true if the module has an OpenMP requires clause that includes + /// unified_shared_memory. + static bool moduleRequiresUSM(mlir::ModuleOp module) { + assert(module && "invalid module"); + if (auto req = module->getAttrOfType( + "omp.requires")) + return mlir::omp::bitEnumContainsAll( + req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory); + return false; + } + /// Create the member map for coordRef and append it (and its index /// path) to the provided new* vectors, if it is not already present. void appendMemberMapIfNew( @@ -430,19 +441,8 @@ class MapInfoFinalizationPass // For unified_shared_memory, we additionally add `CLOSE` on the descriptor // to ensure device-local placement where required by tests relying on USM + // close semantics. - if (target) { - if (auto mod = target->getParentOfType()) { - if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) { - if (auto req = - mlir::dyn_cast(reqAttr)) { - if (mlir::omp::bitEnumContainsAll( - req.getValue(), - mlir::omp::ClauseRequires::unified_shared_memory)) - flags |= mapFlags::OMP_MAP_CLOSE; - } - } - } - } + if (moduleRequiresUSM(target->getParentOfType())) + flags |= mapFlags::OMP_MAP_CLOSE; return llvm::to_underlying(flags); } @@ -1258,22 +1258,9 @@ class MapInfoFinalizationPass // to ensure device-visible pointer size/behavior in USM scenarios // without changing default expectations elsewhere. func->walk([&](mlir::omp::MapInfoOp op) { - // Check module requires USM; otherwise, leave mappings untouched. - auto mod = func->getParentOfType(); - bool hasUSM = false; - if (mod) { - if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) { - if (auto req = - mlir::dyn_cast(reqAttr)) { - hasUSM = mlir::omp::bitEnumContainsAll( - req.getValue(), - mlir::omp::ClauseRequires::unified_shared_memory); - } - } - } - if (!hasUSM) + // Only expand C_PTR members when unified_shared_memory is required. + if (!moduleRequiresUSM(func->getParentOfType())) return; - builder.setInsertionPoint(op); genCptrMemberMap(op, builder); });