diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 766a0d6bb8ee0..636879f28a2fb 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -101,7 +101,7 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments, Arg, "", [MemWrite]>:$pinned, Arg, "", [MemRead]>:$source, cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat, - UnitAttr:$hasDoubleDescriptor); + UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer); let results = (outs AnyIntegerType:$stat); @@ -129,7 +129,7 @@ def cuf_DeallocateOp : cuf_Op<"deallocate", let arguments = (ins Arg:$box, Arg, "", [MemWrite]>:$errmsg, cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat, - UnitAttr:$hasDoubleDescriptor); + UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer); let results = (outs AnyIntegerType:$stat); diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 2ae13e2bd73fb..c9a9d935bd615 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -803,7 +803,8 @@ class AllocateStmtHelper { builder, loc, retTy, box.getAddr(), errmsg, stream, pinned, source, cudaAttr, errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr, - doubleDescriptors ? builder.getUnitAttr() : nullptr) + doubleDescriptors ? builder.getUnitAttr() : nullptr, + box.isPointer() ? builder.getUnitAttr() : nullptr) .getResult(); } @@ -873,7 +874,8 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder, return cuf::DeallocateOp::create( builder, loc, retTy, box.getAddr(), errmsg, cudaAttr, errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr, - doubleDescriptors ? builder.getUnitAttr() : nullptr) + doubleDescriptors ? builder.getUnitAttr() : nullptr, + box.isPointer() ? builder.getUnitAttr() : nullptr) .getResult(); } diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp index 2c40991580c2e..6579c2362cd87 100644 --- a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp @@ -322,15 +322,7 @@ struct CUFAllocateOpConversion fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); - bool isPointer = false; - - if (auto declareOp = - mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) - if (declareOp.getFortranAttrs() && - bitEnumContainsAny(*declareOp.getFortranAttrs(), - fir::FortranVariableFlagsEnum::pointer)) - isPointer = true; - + bool isPointer = op.getPointer(); if (op.getHasDoubleDescriptor()) { // Allocation for module variable are done with custom runtime entry point // so the descriptors can be synchronized. diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index eb2816145c77a..9d0d181609ada 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -183,7 +183,7 @@ func.func @_QQallocate_stream() { func.func @_QPp_alloc() { %0 = cuf.alloc !fir.box>>> {bindc_name = "complex_array", data_attr = #cuf.cuda, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref>>>> %4 = fir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref>>>>) -> !fir.ref>>>> - %9 = cuf.allocate %4 : !fir.ref>>>> {data_attr = #cuf.cuda} -> i32 + %9 = cuf.allocate %4 : !fir.ref>>>> {data_attr = #cuf.cuda, pointer} -> i32 return } @@ -201,7 +201,7 @@ func.func @_QPpointer_source() { %5 = cuf.alloc !fir.box>> {bindc_name = "a_d", data_attr = #cuf.cuda, uniq_name = "_QFpointer_sourceEa_d"} -> !fir.ref>>> %7 = fir.declare %5 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFpointer_sourceEa_d"} : (!fir.ref>>>) -> !fir.ref>>> %8 = fir.load %4 : !fir.ref>>> - %22 = cuf.allocate %7 : !fir.ref>>> source(%8 : !fir.box>>) {data_attr = #cuf.cuda} -> i32 + %22 = cuf.allocate %7 : !fir.ref>>> source(%8 : !fir.box>>) {data_attr = #cuf.cuda, pointer} -> i32 return } @@ -226,7 +226,7 @@ func.func @_QQpointer_sync() attributes {fir.bindc_name = "test"} { %3 = fir.convert %c1 : (index) -> i64 %4 = fir.convert %c10_i32 : (i32) -> i64 fir.call @_FortranAAllocatableSetBounds(%2, %c0_i32, %3, %4) fastmath : (!fir.ref>, i32, i64, i64) -> () - %6 = cuf.allocate %1 : !fir.ref>>> {data_attr = #cuf.cuda, hasDoubleDescriptor} -> i32 + %6 = cuf.allocate %1 : !fir.ref>>> {data_attr = #cuf.cuda, hasDoubleDescriptor, pointer} -> i32 return } @@ -246,7 +246,7 @@ func.func @_QMmod1Ppointer_source_global() { %2 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"} %6 = fir.declare %2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref>>>) -> !fir.ref>>> %7 = fir.load %6 : !fir.ref>>> - %21 = cuf.allocate %1 : !fir.ref>>> source(%7 : !fir.box>>) {data_attr = #cuf.cuda, hasDoubleDescriptor} -> i32 + %21 = cuf.allocate %1 : !fir.ref>>> source(%7 : !fir.box>>) {data_attr = #cuf.cuda, hasDoubleDescriptor, pointer} -> i32 return } diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf index 393faff6046bc..43e716532ecca 100644 --- a/flang/test/Lower/CUDA/cuda-allocatable.cuf +++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf @@ -227,6 +227,14 @@ end ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref> +subroutine devicepointer() + integer, device, pointer :: i(:) + allocate(i(10)) +end + +! CHECK-LABEL: func.func @_QPdevicepointer() +! CHECK: cuf.allocate{{.*}}pointer + subroutine cuda_component() use globals type(t1), pointer, dimension(:) :: d