diff --git a/flang-rt/lib/cuda/allocatable.cpp b/flang-rt/lib/cuda/allocatable.cpp index ff1a225d66ce9..60a755aeb34eb 100644 --- a/flang-rt/lib/cuda/allocatable.cpp +++ b/flang-rt/lib/cuda/allocatable.cpp @@ -23,11 +23,11 @@ namespace Fortran::runtime::cuda { extern "C" { RT_EXT_API_GROUP_BEGIN -int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream, - bool *pinned, bool hasStat, const Descriptor *errMsg, +int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int32_t allocIdx, + int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFAllocatableAllocate)( - desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; + desc, allocIdx, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; #ifndef RT_DEVICE_COMPILATION // Descriptor synchronization is only done when the allocation is done // from the host. @@ -41,9 +41,14 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream, return stat; } -int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream, - bool *pinned, bool hasStat, const Descriptor *errMsg, +int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int32_t allocIdx, + int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { +#if !defined(RT_DEVICE_COMPILATION) + if (desc.GetAllocIdx() == kDefaultAllocator) { + desc.SetAllocIdx(allocIdx); + } +#endif // Perform the standard allocation. int stat{RTNAME(AllocatableAllocate)( desc, stream, hasStat, errMsg, sourceFile, sourceLine)}; @@ -56,10 +61,11 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream, } int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, - const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { - int stat{RTNAME(CUFAllocatableAllocate)( - alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; + const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned, + bool hasStat, const Descriptor *errMsg, const char *sourceFile, + int sourceLine) { + int stat{RTNAME(CUFAllocatableAllocate)(alloc, allocIdx, stream, pinned, + hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( @@ -69,10 +75,11 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, } int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, - const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { - int stat{RTNAME(CUFAllocatableAllocateSync)( - alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; + const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned, + bool hasStat, const Descriptor *errMsg, const char *sourceFile, + int sourceLine) { + int stat{RTNAME(CUFAllocatableAllocateSync)(alloc, allocIdx, stream, pinned, + hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( diff --git a/flang-rt/lib/cuda/pointer.cpp b/flang-rt/lib/cuda/pointer.cpp index d3f5cfe8e96a1..03187f788638d 100644 --- a/flang-rt/lib/cuda/pointer.cpp +++ b/flang-rt/lib/cuda/pointer.cpp @@ -22,9 +22,14 @@ namespace Fortran::runtime::cuda { extern "C" { RT_EXT_API_GROUP_BEGIN -int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t *stream, bool *pinned, - bool hasStat, const Descriptor *errMsg, const char *sourceFile, - int sourceLine) { +int RTDEF(CUFPointerAllocate)(Descriptor &desc, int32_t allocIdx, + int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine) { +#if !defined(RT_DEVICE_COMPILATION) + if (desc.GetAllocIdx() == kDefaultAllocator) { + desc.SetAllocIdx(allocIdx); + } +#endif // Perform the standard allocation. int stat{ RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)}; @@ -36,11 +41,11 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t *stream, bool *pinned, return stat; } -int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream, - bool *pinned, bool hasStat, const Descriptor *errMsg, +int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int32_t allocIdx, + int64_t *stream, bool *pinned, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(CUFPointerAllocate)( - desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; + desc, allocIdx, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; #ifndef RT_DEVICE_COMPILATION // Descriptor synchronization is only done when the allocation is done // from the host. @@ -55,10 +60,11 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream, } int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, - const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { - int stat{RTNAME(CUFPointerAllocate)( - pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; + const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned, + bool hasStat, const Descriptor *errMsg, const char *sourceFile, + int sourceLine) { + int stat{RTNAME(CUFPointerAllocate)(pointer, allocIdx, stream, pinned, + hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( @@ -68,10 +74,11 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, } int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer, - const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat, - const Descriptor *errMsg, const char *sourceFile, int sourceLine) { - int stat{RTNAME(CUFPointerAllocateSync)( - pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)}; + const Descriptor &source, int32_t allocIdx, int64_t *stream, bool *pinned, + bool hasStat, const Descriptor *errMsg, const char *sourceFile, + int sourceLine) { + int stat{RTNAME(CUFPointerAllocateSync)(pointer, allocIdx, stream, pinned, + hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; Fortran::runtime::DoFromSourceAssign( diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h index 4a831fd502af4..0a085f47327f2 100644 --- a/flang/include/flang/Lower/CUDA.h +++ b/flang/include/flang/Lower/CUDA.h @@ -31,21 +31,7 @@ namespace Fortran::lower { class AbstractConverter; -static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) { - std::optional cudaAttr = - Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate()); - if (cudaAttr) { - if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned) - return kPinnedAllocatorPos; - if (*cudaAttr == Fortran::common::CUDADataAttr::Device) - return kDeviceAllocatorPos; - if (*cudaAttr == Fortran::common::CUDADataAttr::Managed) - return kManagedAllocatorPos; - if (*cudaAttr == Fortran::common::CUDADataAttr::Unified) - return kUnifiedAllocatorPos; - } - return kDefaultAllocator; -} +unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym); void initializeDeviceComponentAllocator( Fortran::lower::AbstractConverter &converter, diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h index 4a250d1cc6c54..c00f9e718ad18 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h +++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h @@ -112,6 +112,8 @@ cuf::DataAttributeAttr getDataAttr(mlir::Operation *op); /// Returns true if the operation has a data attribute with the given value. bool hasDataAttr(mlir::Operation *op, cuf::DataAttribute value); +unsigned getAllocatorIdx(cuf::DataAttribute dataAttr); + } // namespace cuf #endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h index 6c97afa9e10e8..43b45cff9a1f5 100644 --- a/flang/include/flang/Runtime/CUDA/allocatable.h +++ b/flang/include/flang/Runtime/CUDA/allocatable.h @@ -17,31 +17,33 @@ namespace Fortran::runtime::cuda { extern "C" { /// Perform allocation of the descriptor. -int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t *stream = nullptr, - bool *pinned = nullptr, bool hasStat = false, +int RTDECL(CUFAllocatableAllocate)(Descriptor &, int32_t allocIdx, + int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. -int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t *stream = nullptr, - bool *pinned = nullptr, bool hasStat = false, +int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int32_t allocIdx, + int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor without synchronization. Assign data /// from source. int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, - const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr, + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, - const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr, + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); /// Perform deallocation of the descriptor with synchronization of it when /// necessary. diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h index bdfc3268e0814..64698370534ce 100644 --- a/flang/include/flang/Runtime/CUDA/pointer.h +++ b/flang/include/flang/Runtime/CUDA/pointer.h @@ -17,31 +17,33 @@ namespace Fortran::runtime::cuda { extern "C" { /// Perform allocation of the descriptor. -int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t *stream = nullptr, - bool *pinned = nullptr, bool hasStat = false, +int RTDECL(CUFPointerAllocate)(Descriptor &, int32_t allocIdx, + int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. -int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t *stream = nullptr, - bool *pinned = nullptr, bool hasStat = false, +int RTDECL(CUFPointerAllocateSync)(Descriptor &, int32_t allocIdx, + int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor without synchronization. Assign data /// from source. int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, - const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr, + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer, - const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); + const Descriptor &source, int32_t allocIdx, int64_t *stream = nullptr, + bool *pinned = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); } // extern "C" diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp index 1293d2c5bd3ae..5bb0a11e4fa56 100644 --- a/flang/lib/Lower/CUDA.cpp +++ b/flang/lib/Lower/CUDA.cpp @@ -165,3 +165,20 @@ bool Fortran::lower::isTransferWithConversion(mlir::Value rhs) { return true; return false; } + +unsigned +Fortran::lower::getAllocatorIdx(const Fortran::semantics::Symbol &sym) { + std::optional cudaAttr = + Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate()); + if (cudaAttr) { + if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned) + return kPinnedAllocatorPos; + if (*cudaAttr == Fortran::common::CUDADataAttr::Device) + return kDeviceAllocatorPos; + if (*cudaAttr == Fortran::common::CUDADataAttr::Managed) + return kManagedAllocatorPos; + if (*cudaAttr == Fortran::common::CUDADataAttr::Unified) + return kUnifiedAllocatorPos; + } + return kDefaultAllocator; +} diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 80af7f4c1aaad..6e9518a0f3349 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -478,20 +478,6 @@ createGlobalInitialization(fir::FirOpBuilder &builder, fir::GlobalOp global, builder.restoreInsertionPoint(insertPt); } -static unsigned getAllocatorIdxFromDataAttr(cuf::DataAttributeAttr dataAttr) { - if (dataAttr) { - if (dataAttr.getValue() == cuf::DataAttribute::Pinned) - return kPinnedAllocatorPos; - if (dataAttr.getValue() == cuf::DataAttribute::Device) - return kDeviceAllocatorPos; - if (dataAttr.getValue() == cuf::DataAttribute::Managed) - return kManagedAllocatorPos; - if (dataAttr.getValue() == cuf::DataAttribute::Unified) - return kUnifiedAllocatorPos; - } - return kDefaultAllocator; -} - /// Create the global op and its init if it has one fir::GlobalOp Fortran::lower::defineGlobal( Fortran::lower::AbstractConverter &converter, @@ -554,7 +540,9 @@ fir::GlobalOp Fortran::lower::defineGlobal( mlir::Value box = fir::factory::createUnallocatedBox( b, loc, symTy, /*nonDeferredParams=*/{}, - /*typeSourceBox=*/{}, getAllocatorIdxFromDataAttr(dataAttr)); + /*typeSourceBox=*/{}, + dataAttr ? cuf::getAllocatorIdx(dataAttr.getValue()) + : kDefaultAllocator); fir::HasValueOp::create(b, loc, box); }); } diff --git a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp index bd0499f406c18..fd5dd555c04cd 100644 --- a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp @@ -12,6 +12,7 @@ #include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" #include "flang/Optimizer/Dialect/CUF/CUFDialect.h" +#include "flang/Runtime/allocator-registry-consts.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectImplementation.h" @@ -52,4 +53,16 @@ bool hasDataAttr(mlir::Operation *op, cuf::DataAttribute value) { return false; } +unsigned getAllocatorIdx(cuf::DataAttribute dataAttr) { + if (dataAttr == cuf::DataAttribute::Pinned) + return kPinnedAllocatorPos; + if (dataAttr == cuf::DataAttribute::Device) + return kDeviceAllocatorPos; + if (dataAttr == cuf::DataAttribute::Managed) + return kManagedAllocatorPos; + if (dataAttr == cuf::DataAttribute::Unified) + return kUnifiedAllocatorPos; + return kDefaultAllocator; +} + } // namespace cuf diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 9834b0499b930..9021c5d982321 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -106,7 +106,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op, mlir::Value sourceLine; if constexpr (std::is_same_v) sourceLine = fir::factory::locationToLineNo( - builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6)); + builder, loc, op.getSource() ? fTy.getInput(8) : fTy.getInput(7)); else sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); @@ -122,6 +122,8 @@ static mlir::LogicalResult convertOpToCall(OpTy op, } llvm::SmallVector args; if constexpr (std::is_same_v) { + mlir::Value allocIdx = builder.createIntegerConstant( + loc, builder.getI32Type(), cuf::getAllocatorIdx(op.getDataAttr())); mlir::Value pinned = op.getPinned() ? op.getPinned() @@ -133,15 +135,15 @@ static mlir::LogicalResult convertOpToCall(OpTy op, op.getStream() ? op.getStream() : builder.createNullConstant(loc, fTy.getInput(2)); args = fir::runtime::createArguments( - builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned, - hasStat, errmsg, sourceFile, sourceLine); + builder, loc, fTy, op.getBox(), op.getSource(), allocIdx, stream, + pinned, hasStat, errmsg, sourceFile, sourceLine); } else { mlir::Value stream = op.getStream() ? op.getStream() : builder.createNullConstant(loc, fTy.getInput(1)); args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), - stream, pinned, hasStat, errmsg, - sourceFile, sourceLine); + allocIdx, stream, pinned, hasStat, + errmsg, sourceFile, sourceLine); } } else { args = diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index ea7890c9aac52..799d9991dfa83 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -19,7 +19,7 @@ func.func @_QPsub1() { // CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref>) -> !fir.ref>>> // CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %c2{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i32, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -47,7 +47,7 @@ func.func @_QPsub3() { // CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %c2{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i32, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -87,7 +87,7 @@ func.func @_QPsub5() { } // CHECK-LABEL: func.func @_QPsub5() -// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref>, i32, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -118,7 +118,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} { // CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMdataEb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: _FortranAAllocatableSetBounds // CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %c2{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i32, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 func.func @_QPallocate_source() { @@ -142,7 +142,7 @@ func.func @_QPallocate_source() { // CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref>>> // CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box>>) -> !fir.box -// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %c2{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i32, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda} : !fir.box>> { @@ -177,7 +177,7 @@ func.func @_QQallocate_stream() { // CHECK-LABEL: func.func @_QQallocate_stream() // CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"} // CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref) -> !fir.ref -// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %c2{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i32, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 func.func @_QPp_alloc() { @@ -266,6 +266,6 @@ func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} { // CHECK: %[[PINNED:.*]] = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"} // CHECK: %[[DECL_PINNED:.*]] = fir.declare %[[PINNED]] {uniq_name = "_QFEpinnedflag"} : (!fir.ref>) -> !fir.ref> // CHECK: %[[CONV_PINNED:.*]] = fir.convert %[[DECL_PINNED]] : (!fir.ref>) -> !fir.ref -// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %c1{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i32, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 } // end of module