diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp index bfb0daeacb8c3..35badb6eadb1c 100644 --- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp +++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp @@ -53,21 +53,26 @@ static void processAddrOfOp(fir::AddrOfOp addrOfOp, } } +static void processTypeDescriptor(fir::RecordType recTy, + mlir::SymbolTable &symbolTable, + llvm::DenseSet &candidates) { + if (auto globalOp = symbolTable.lookup( + fir::NameUniquer::getTypeDescriptorName(recTy.getName()))) { + if (!candidates.contains(globalOp)) { + globalOp.walk([&](fir::AddrOfOp op) { + processAddrOfOp(op, symbolTable, candidates, + /*recurseInGlobal=*/true); + }); + candidates.insert(globalOp); + } + } +} + static void processEmboxOp(fir::EmboxOp emboxOp, mlir::SymbolTable &symbolTable, llvm::DenseSet &candidates) { if (auto recTy = mlir::dyn_cast( - fir::unwrapRefType(emboxOp.getMemref().getType()))) { - if (auto globalOp = symbolTable.lookup( - fir::NameUniquer::getTypeDescriptorName(recTy.getName()))) { - if (!candidates.contains(globalOp)) { - globalOp.walk([&](fir::AddrOfOp op) { - processAddrOfOp(op, symbolTable, candidates, - /*recurseInGlobal=*/true); - }); - candidates.insert(globalOp); - } - } - } + fir::unwrapRefType(emboxOp.getMemref().getType()))) + processTypeDescriptor(recTy, symbolTable, candidates); } static void @@ -85,6 +90,17 @@ prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp, } } +static void +processPotentialTypeDescriptor(mlir::Type candidateType, + mlir::SymbolTable &symbolTable, + llvm::DenseSet &candidates) { + if (auto boxTy = mlir::dyn_cast(candidateType)) + candidateType = boxTy.getEleTy(); + candidateType = fir::unwrapSequenceType(fir::unwrapRefType(candidateType)); + if (auto recTy = mlir::dyn_cast(candidateType)) + processTypeDescriptor(recTy, symbolTable, candidates); +} + class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase { public: void runOnOperation() override { @@ -115,6 +131,8 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase { for (auto globalOp : mod.getOps()) { if (cuf::isRegisteredDeviceGlobal(globalOp)) { candidates.insert(globalOp); + processPotentialTypeDescriptor(globalOp.getType(), parentSymTable, + candidates); } else if (globalOp.getConstant() && mlir::isa( fir::unwrapRefType(globalOp.resultType()))) { diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90 index 4c634513745fd..35c025dad3000 100644 --- a/flang/test/Fir/CUDA/cuda-device-global.f90 +++ b/flang/test/Fir/CUDA/cuda-device-global.f90 @@ -24,3 +24,26 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.conta // CHECK: gpu.module @cuda_device_mod // CHECK-DAG: fir.global @_QMm2ECc // CHECK-DAG: fir.global @_QMm1ECb + +// ----- + +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} { + fir.global @_QMmEddarrays {data_attr = #cuf.cuda} : !fir.box>>,phi_i:!fir.box>>,phi0_r:!fir.box>>,phi0_i:!fir.box>>,buf_r:!fir.box>>,buf_i:!fir.box>>}>>>> { + %c0 = arith.constant 0 : index + %0 = fir.zero_bits !fir.heap>>,phi_i:!fir.box>>,phi0_r:!fir.box>>,phi0_i:!fir.box>>,buf_r:!fir.box>>,buf_i:!fir.box>>}>>> + %1 = fir.shape %c0 : (index) -> !fir.shape<1> + %2 = fir.embox %0(%1) {allocator_idx = 3 : i32} : (!fir.heap>>,phi_i:!fir.box>>,phi0_r:!fir.box>>,phi0_i:!fir.box>>,buf_r:!fir.box>>,buf_i:!fir.box>>}>>>, !fir.shape<1>) -> !fir.box>>,phi_i:!fir.box>>,phi0_r:!fir.box>>,phi0_i:!fir.box>>,buf_r:!fir.box>>,buf_i:!fir.box>>}>>>> + fir.has_value %2 : !fir.box>>,phi_i:!fir.box>>,phi0_r:!fir.box>>,phi0_i:!fir.box>>,buf_r:!fir.box>>,buf_i:!fir.box>>}>>>> + } + fir.global linkonce_odr @_QMmE.dt.devicearrays constant target : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}> { + %0 = fir.undefined !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}> + fir.has_value %0 : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box,name:!fir.box>>}>>>>,name:!fir.box>>,sizeinbytes:i64,uninstantiated:!fir.box>>,kindparameter:!fir.box>>,lenparameterkind:!fir.box>>,component:!fir.box>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box>>,lenvalue:!fir.box,value:i64}>>>>,bounds:!fir.box,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}> + } +} + + +// CHECK-NAG: fir.global @_QMmEddarrays +// CHECK-NAG: fir.global linkonce_odr @_QMmE.dt.devicearrays +// CHECK: gpu.module @cuda_device_mod +// CHECK-NAG: fir.global @_QMmEddarrays +// CHECK-NAG: fir.global linkonce_odr @_QMmE.dt.devicearrays