diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h index 3c3ae73d4ad7a..fb48152d70718 100644 --- a/flang/include/flang/Runtime/CUDA/memory.h +++ b/flang/include/flang/Runtime/CUDA/memory.h @@ -36,19 +36,18 @@ void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a pointer to a descriptor. -void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src, +void RTDECL(CUFDataTransferDescPtr)(Descriptor *dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a descriptor to a pointer. -void RTDECL(CUFDataTransferPtrDesc)(void *dst, const Descriptor &src, +void RTDECL(CUFDataTransferPtrDesc)(void *dst, Descriptor *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a descriptor to a descriptor. -void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dst, - const Descriptor &src, unsigned mode, const char *sourceFile = nullptr, - int sourceLine = 0); +void RTDECL(CUFDataTransferDescDesc)(Descriptor *dst, Descriptor *src, + unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index f1f3a95b220df..e3e441360e949 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -529,8 +529,8 @@ struct CUFDataTransferOpConversion mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); mlir::Value sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); - mlir::Value dst = builder.loadIfRef(loc, op.getDst()); - mlir::Value src = builder.loadIfRef(loc, op.getSrc()); + mlir::Value dst = op.getDst(); + mlir::Value src = op.getSrc(); llvm::SmallVector args{fir::runtime::createArguments( builder, loc, fTy, dst, src, modeValue, sourceFile, sourceLine)}; builder.create(loc, func, args); @@ -603,11 +603,8 @@ struct CUFDataTransferOpConversion mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); mlir::Value sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(5)); - mlir::Value dst = - dstIsDesc ? builder.loadIfRef(loc, op.getDst()) : op.getDst(); - mlir::Value src = mlir::isa(srcTy) - ? builder.loadIfRef(loc, op.getSrc()) - : op.getSrc(); + mlir::Value dst = op.getDst(); + mlir::Value src = op.getSrc(); llvm::SmallVector args{ fir::runtime::createArguments(builder, loc, fTy, dst, src, bytes, modeValue, sourceFile, sourceLine)}; diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index fc48b4343eea9..4778a4ae77683 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -73,23 +73,22 @@ void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); } -void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr, +void RTDEF(CUFDataTransferDescPtr)(Descriptor *desc, void *addr, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a pointer to a descriptor"); } -void RTDEF(CUFDataTransferPtrDesc)(void *addr, const Descriptor &desc, +void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a descriptor to a pointer"); } -void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dstDesc, - const Descriptor &srcDesc, unsigned mode, const char *sourceFile, - int sourceLine) { +void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, + unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer between two descriptors"); diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index c33c50115b9fc..b99e09fb76468 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -15,11 +15,9 @@ func.func @_QPsub1() { // CHECK-LABEL: func.func @_QPsub1() // CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Eadev"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Eahost"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) -// CHECK: %[[AHOST_LOAD:.*]] = fir.load %[[AHOST]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[AHOST_BOX:.*]] = fir.convert %[[AHOST_LOAD]] : (!fir.box>>) -> !fir.box -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box -// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[AHOST_BOX]], %[[ADEV_BOX]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.box, i32, !fir.ref, i32) -> none +// CHECK: %[[AHOST_BOX:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> +// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[AHOST_BOX]], %[[ADEV_BOX]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref>, i32, !fir.ref, i32) -> none func.func @_QPsub2() { %0 = cuf.alloc !fir.box>> {bindc_name = "adev", data_attr = #cuf.cuda, uniq_name = "_QFsub2Eadev"} -> !fir.ref>>> @@ -76,19 +74,17 @@ func.func @_QPsub4() { // CHECK: %[[NBELEM:.*]] = arith.constant 10 : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>) -> !fir.llvm_ptr // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none // CHECK: %[[NBELEM:.*]] = arith.constant 10 : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>) -> !fir.llvm_ptr -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.box, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.ref>, i64, i32, !fir.ref, i32) -> none func.func @_QPsub5(%arg0: !fir.ref {fir.bindc_name = "n"}) { %0 = fir.dummy_scope : !fir.dscope @@ -122,19 +118,17 @@ func.func @_QPsub5(%arg0: !fir.ref {fir.bindc_name = "n"}) { // CHECK: %[[NBELEM:.*]] = arith.muli %[[I1]], %[[I2]] : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#1 : (!fir.ref>) -> !fir.llvm_ptr // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none // CHECK: %[[NBELEM:.*]] = arith.muli %[[I1]], %[[I2]] : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#1 : (!fir.ref>) -> !fir.llvm_ptr -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.box, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.ref>, i64, i32, !fir.ref, i32) -> none func.func @_QPsub6() { %0 = cuf.alloc i32 {bindc_name = "idev", data_attr = #cuf.cuda, uniq_name = "_QFsub6Eidev"} -> !fir.ref