diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp index d3cc6c2c99e79..aa75d4eff0511 100644 --- a/flang-rt/lib/cuda/descriptor.cpp +++ b/flang-rt/lib/cuda/descriptor.cpp @@ -62,15 +62,6 @@ void RTDEF(CUFDescriptorCheckSection)( } } -void RTDEF(CUFSetAllocatorIndex)( - Descriptor *desc, int index, const char *sourceFile, int sourceLine) { - if (!desc) { - Terminator terminator{sourceFile, sourceLine}; - terminator.Crash("descriptor is null"); - } - desc->SetAllocIdx(index); -} - RT_EXT_API_GROUP_END } } // namespace Fortran::runtime::cuda diff --git a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp index 4b22e29cc8172..f1f931e87a86e 100644 --- a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp +++ b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -72,14 +72,3 @@ TEST(AllocatableCUFTest, DescriptorAllocationTest) { EXPECT_TRUE(desc != nullptr); RTNAME(CUFFreeDescriptor)(desc); } - -TEST(AllocatableCUFTest, CUFSetAllocatorIndex) { - using Fortran::common::TypeCategory; - RTNAME(CUFRegisterAllocator)(); - // REAL(4), DEVICE, ALLOCATABLE :: a(:) - auto a{createAllocatable(TypeCategory::Real, 4)}; - EXPECT_EQ((int)kDefaultAllocator, a->GetAllocIdx()); - RTNAME(CUFSetAllocatorIndex)( - a.get(), kDeviceAllocatorPos, __FILE__, __LINE__); - EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx()); -} diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h index 4a831fd502af4..ab9dde8ad5198 100644 --- a/flang/include/flang/Lower/CUDA.h +++ b/flang/include/flang/Lower/CUDA.h @@ -47,10 +47,6 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) { return kDefaultAllocator; } -void initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box); - mlir::Type gatherDeviceComponentCoordinatesAndType( fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::semantics::Symbol &sym, fir::RecordType recTy, diff --git a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h index 43dca65322a62..bdeb7574012c6 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h @@ -31,10 +31,6 @@ void genSyncGlobalDescriptor(fir::FirOpBuilder &builder, mlir::Location loc, void genDescriptorCheckSection(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value desc); -/// Generate runtime call to set the allocator index in the descriptor. -void genSetAllocatorIndex(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Value desc, mlir::Value index); - } // namespace fir::runtime::cuda #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CUDA_DESCRIPTOR_H_ diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 23ab882615209..e38738230ffbc 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -388,25 +388,4 @@ def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> { let hasVerifier = 1; } -def cuf_SetAllocatorIndexOp : cuf_Op<"set_allocator_idx", []> { - let summary = "Set the allocator index in a descriptor"; - - let description = [{ - Allocator index in the Fortran descriptor is used to retrived the correct - CUDA allocator to allocate the memory on the device. - In many cases the allocator index is set when the descriptor is created. For - device components, the descriptor is part of the derived-type itself and - needs to be set after the derived-type is allocated in managed memory. - }]; - - let arguments = (ins Arg:$box, - cuf_DataAttributeAttr:$data_attr); - - let assemblyFormat = [{ - $box `:` qualified(type($box)) attr-dict - }]; - - let hasVerifier = 1; -} - #endif // FORTRAN_DIALECT_CUF_CUF_OPS diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h index 7555f276ac1de..06e4a4649db1b 100644 --- a/flang/include/flang/Runtime/CUDA/descriptor.h +++ b/flang/include/flang/Runtime/CUDA/descriptor.h @@ -41,10 +41,6 @@ void RTDECL(CUFSyncGlobalDescriptor)( void RTDECL(CUFDescriptorCheckSection)( const Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0); -/// Set the allocator index with the provided value. -void RTDECL(CUFSetAllocatorIndex)(Descriptor *, int index, - const char *sourceFile = nullptr, int sourceLine = 0); - } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 444b5b6c7c4b1..53239cb83c6c0 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -450,9 +450,6 @@ class AllocateStmtHelper { if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare)) Fortran::lower::attachDeclarePostAllocAction(converter, builder, alloc.getSymbol()); - if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol())) - Fortran::lower::initializeDeviceComponentAllocator( - converter, alloc.getSymbol(), box); } void setPinnedToFalse() { diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp index 1293d2c5bd3ae..bb4bdee78f97d 100644 --- a/flang/lib/Lower/CUDA.cpp +++ b/flang/lib/Lower/CUDA.cpp @@ -17,95 +17,6 @@ #define DEBUG_TYPE "flang-lower-cuda" -void Fortran::lower::initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) { - if (const auto *details{ - sym.GetUltimate() - .detailsIf()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType()); - - // Only pointer and allocatable needs post allocation initialization - // of components descriptors. - if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy)) - return; - - // Extract the derived type. - mlir::Type ty = fir::getDerivedType(baseTy); - auto recTy = mlir::dyn_cast(ty); - assert(recTy && "expected fir::RecordType"); - - if (auto boxTy = mlir::dyn_cast(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr()); - mlir::Value addr; - if (auto seqTy = mlir::dyn_cast(baseTy)) { - mlir::Type idxTy = builder.getIndexType(); - mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); - mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); - llvm::SmallVector loops; - llvm::SmallVector indices; - llvm::SmallVector extents; - for (unsigned i = 0; i < seqTy.getDimension(); ++i) { - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy, - idxTy, loadedBox, dim); - mlir::Value lbub = mlir::arith::AddIOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1)); - mlir::Value ext = - mlir::arith::SubIOp::create(builder, loc, lbub, one); - mlir::Value cmp = mlir::arith::CmpIOp::create( - builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero); - ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero); - extents.push_back(ext); - - auto loop = fir::DoLoopOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1), - dimInfo.getResult(2), /*isUnordered=*/true, - /*finalCount=*/false, mlir::ValueRange{}); - loops.push_back(loop); - indices.push_back(loop.getInductionVar()); - builder.setInsertionPointToStart(loop.getBody()); - } - mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox); - auto shape = fir::ShapeOp::create(builder, loc, extents); - addr = fir::ArrayCoorOp::create( - builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape, - /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{}); - } else { - addr = fir::BoxAddrOp::create(builder, loc, loadedBox); - } - for (const auto &compSym : components) { - if (Fortran::semantics::IsDeviceAllocatable(compSym)) { - llvm::SmallVector coord; - mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType( - builder, loc, compSym, recTy, coord); - assert(coord.size() == 1 && "expect one coordinate"); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), addr, coord[0]); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), compSym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType( fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::semantics::Symbol &sym, fir::RecordType recTy, diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index c79c9b1ab0f51..b1cd14c56969a 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -786,62 +786,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter, return res; } -/// Device allocatable components in a derived-type don't have the correct -/// allocator index in their descriptor when they are created. After -/// initialization, cuf.set_allocator_idx operations are inserted to set the -/// correct allocator index for each device component. -static void -initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &symbol, - Fortran::lower::SymMap &symMap) { - if (const auto *details{ - symbol.GetUltimate() - .detailsIf()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - fir::ExtendedValue exv = - converter.getSymbolExtendedValue(symbol.GetUltimate(), &symMap); - mlir::Type baseTy = fir::unwrapRefType(fir::getBase(exv).getType()); - if (auto boxTy = mlir::dyn_cast(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - if (fir::isAllocatableType(fir::getBase(exv).getType()) || - fir::isPointerType(fir::getBase(exv).getType())) - return; // Allocator index need to be set after allocation. - - auto recTy = - mlir::dyn_cast(fir::unwrapSequenceType(baseTy)); - assert(recTy && "expected fir::RecordType"); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - for (const auto &sym : components) { - if (Fortran::semantics::IsDeviceAllocatable(sym)) { - llvm::SmallVector coord; - mlir::Type fieldTy = - Fortran::lower::gatherDeviceComponentCoordinatesAndType( - builder, loc, sym, recTy, coord); - mlir::Value base = fir::getBase(exv); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), base, coord); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), sym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - /// Must \p var be default initialized at runtime when entering its scope. static bool mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) { @@ -1164,9 +1108,6 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); auto *builder = &converter.getFirOpBuilder(); if (needCUDAAlloc(var.getSymbol()) && !cuf::isCUDADeviceContext(builder->getRegion())) { @@ -1426,9 +1367,6 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); } //===--------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp index a6ee98685f3c9..37e4c5a706df2 100644 --- a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp @@ -47,18 +47,3 @@ void fir::runtime::cuda::genDescriptorCheckSection(fir::FirOpBuilder &builder, builder, loc, fTy, desc, sourceFile, sourceLine)}; fir::CallOp::create(builder, loc, func, args); } - -void fir::runtime::cuda::genSetAllocatorIndex(fir::FirOpBuilder &builder, - mlir::Location loc, - mlir::Value desc, - mlir::Value index) { - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc(loc, builder); - auto fTy = func.getFunctionType(); - mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); - llvm::SmallVector args{fir::runtime::createArguments( - builder, loc, fTy, desc, index, sourceFile, sourceLine)}; - fir::CallOp::create(builder, loc, func, args); -} diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index ade80716f2561..687007d957225 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -345,17 +345,6 @@ llvm::LogicalResult cuf::StreamCastOp::verify() { return checkStreamType(*this); } -//===----------------------------------------------------------------------===// -// SetAllocatorOp -//===----------------------------------------------------------------------===// - -llvm::LogicalResult cuf::SetAllocatorIndexOp::verify() { - if (!mlir::isa(fir::unwrapRefType(getBox().getType()))) - return emitOpError( - "expect box to be a reference to class or box type value"); - return mlir::success(); -} - // Tablegen operators #define GET_OP_CLASSES diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 9834b0499b930..4fe83d41c3b96 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -928,34 +928,6 @@ struct CUFSyncDescriptorOpConversion } }; -struct CUFSetAllocatorIndexOpConversion - : public mlir::OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - mlir::LogicalResult - matchAndRewrite(cuf::SetAllocatorIndexOp op, - mlir::PatternRewriter &rewriter) const override { - auto mod = op->getParentOfType(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); - int idx = kDefaultAllocator; - if (op.getDataAttr() == cuf::DataAttribute::Device) { - idx = kDeviceAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Managed) { - idx = kManagedAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Unified) { - idx = kUnifiedAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Pinned) { - idx = kPinnedAllocatorPos; - } - mlir::Value index = - builder.createIntegerConstant(loc, builder.getI32Type(), idx); - fir::runtime::cuda::genSetAllocatorIndex(builder, loc, op.getBox(), index); - op.erase(); - return mlir::success(); - } -}; - class CUFOpConversion : public fir::impl::CUFOpConversionBase { public: void runOnOperation() override { @@ -1017,8 +989,8 @@ void cuf::populateCUFToFIRConversionPatterns( const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { patterns.insert(patterns.getContext(), &dl, &converter); patterns.insert(patterns.getContext()); + CUFFreeOpConversion, CUFSyncDescriptorOpConversion>( + patterns.getContext()); patterns.insert(patterns.getContext(), symtab, &dl, &converter); patterns.insert( diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir index 8b6e7d67931df..31f2ed022b6c4 100644 --- a/flang/test/Fir/CUDA/cuda-alloc-free.fir +++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir @@ -94,19 +94,4 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} { // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 // CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda} : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr - -func.func @_QQsetalloc() { - %0 = cuf.alloc !fir.type<_QMm1Tdt1{a2:!fir.box>>}> {bindc_name = "d1", data_attr = #cuf.cuda, uniq_name = "_QFEd1"} -> !fir.ref>>}>> - %1 = fir.coordinate_of %0, a2 : (!fir.ref>>}>>) -> !fir.ref>>> - cuf.set_allocator_idx %1 : !fir.ref>>> {data_attr = #cuf.cuda} - return -} - -// CHECK-LABEL: func.func @_QQsetalloc() { -// CHECK: %[[DT:.*]] = fir.call @_FortranACUFMemAlloc -// CHECK: %[[CONV:.*]] = fir.convert %[[DT]] : (!fir.llvm_ptr) -> !fir.ref>>}>> -// CHECK: %[[COMP:.*]] = fir.coordinate_of %[[CONV]], a2 : (!fir.ref>>}>>) -> !fir.ref>>> -// CHECK: %[[DESC:.*]] = fir.convert %[[COMP]] : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFSetAllocatorIndex(%[[DESC]], %c2{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i32, !fir.ref, i32) -> () - } // end module diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf deleted file mode 100644 index d783f340fe9a4..0000000000000 --- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf +++ /dev/null @@ -1,66 +0,0 @@ -! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s - -module m1 - type ty_device - integer, device, allocatable, dimension(:) :: x - integer :: y - integer, device, allocatable, dimension(:) :: z - end type -contains - subroutine sub1() - type(ty_device) :: a - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub1() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.type<_QMm1Tty_device{x:!fir.box>>,y:i32,z:!fir.box>>}> {bindc_name = "a", data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub1Ea"} -> !fir.ref>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[DT:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub1Ea"} : (!fir.ref>>,y:i32,z:!fir.box>>}>>) -> (!fir.ref>>,y:i32,z:!fir.box>>}>>, !fir.ref>>,y:i32,z:!fir.box>>}>>) -! CHECK: fir.address_of(@_QQ_QMm1Tty_device.DerivedInit) -! CHECK: fir.copy -! CHECK: %[[X:.*]] = fir.coordinate_of %[[DT]]#0, x : (!fir.ref>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[X]] : !fir.ref>>> {data_attr = #cuf.cuda} -! CHECK: %[[Z:.*]] = fir.coordinate_of %[[DT]]#0, z : (!fir.ref>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[Z]] : !fir.ref>>> {data_attr = #cuf.cuda} - - subroutine sub2() - type(ty_device), pointer :: d1 - allocate(d1) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub2() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box>>,y:i32,z:!fir.box>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref>>,y:i32,z:!fir.box>>}>>>>) -> (!fir.ref>>,y:i32,z:!fir.box>>}>>>>, !fir.ref>>,y:i32,z:!fir.box>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.ptr>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref>>> {data_attr = #cuf.cuda} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref>>> {data_attr = #cuf.cuda} - - subroutine sub3() - type(ty_device), allocatable :: d1 - allocate(d1) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub3() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box>>,y:i32,z:!fir.box>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref>>,y:i32,z:!fir.box>>}>>>>) -> (!fir.ref>>,y:i32,z:!fir.box>>}>>>>, !fir.ref>>,y:i32,z:!fir.box>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.heap>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref>>> {data_attr = #cuf.cuda} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref>>> {data_attr = #cuf.cuda} - - subroutine sub4() - type(ty_device), allocatable :: d1(:,:) - allocate(d1(10, 10)) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub4() -! CHECK: cuf.allocate -! CHECK-COUNT-2: fir.do_loop -! CHECK-COUNT-2: cuf.set_allocator_idx - -end module