diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 2ac4af9e66aa8..f8b3fb861cc62 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -137,4 +137,20 @@ def fir_CUDAClusterDimsAttr : fir_Attr<"CUDAClusterDims"> { let assemblyFormat = "`<` struct(params) `>`"; } +def fir_CUDADataTransferKind : I32EnumAttr< + "CUDADataTransferKind", "CUDA Fortran data transfer kind", + [ + I32EnumAttrCase<"DeviceHost", 0, "device_host">, + I32EnumAttrCase<"HostDevice", 1, "host_device">, + I32EnumAttrCase<"DeviceDevice", 2, "device_device">, + ]> { + let genSpecializedAttr = 0; + let cppNamespace = "::fir"; +} + +def fir_CUDADataTransferKindAttr : + EnumAttr { + let assemblyFormat = [{ ```<` $value `>` }]; +} + #endif // FIR_DIALECT_FIR_ATTRS diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 6e520d111701f..3a1af1258aff2 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -3165,4 +3165,29 @@ def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments, let hasVerifier = 1; } +def fir_CUDADataTransferOp : fir_Op<"cuda_data_transfer", []> { + let summary = "Represent a data transfer between host and device memory"; + + let description = [{ + CUDA Fortran allows data transfer to be done via intrinsic assignment + between a host and a device variable. This operation is used to materialized + the data transfer between the lhs and rhs memory references. + The kind of transfer is specified in the attribute. + + ``` + adev = a ! transfer host to device + a = adev ! transfer device to host + bdev = adev ! transfer device to device + ``` + }]; + + let arguments = (ins Arg:$src, + Arg:$dst, + fir_CUDADataTransferKindAttr:$transfer_kind); + + let assemblyFormat = [{ + $src `to` $dst attr-dict `:` type(operands) + }]; +} + #endif diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index c3cb9ba6a47e3..9cd1dac8b4fee 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3706,15 +3706,39 @@ class FirConverter : public Fortran::lower::AbstractConverter { return false; } + static void genCUDADataTransfer(fir::FirOpBuilder &builder, + mlir::Location loc, bool lhsIsDevice, + hlfir::Entity &lhs, bool rhsIsDevice, + hlfir::Entity &rhs) { + if (rhs.isBoxAddressOrValue() || lhs.isBoxAddressOrValue()) + TODO(loc, "CUDA data transfler with descriptors"); + if (lhsIsDevice && !rhsIsDevice) { + auto transferKindAttr = fir::CUDADataTransferKindAttr::get( + builder.getContext(), fir::CUDADataTransferKind::HostDevice); + // device = host + if (!rhs.isVariable()) { + auto associate = hlfir::genAssociateExpr( + loc, builder, rhs, rhs.getType(), ".cuf_host_tmp"); + builder.create(loc, associate.getBase(), lhs, + transferKindAttr); + builder.create(loc, associate); + } else { + builder.create(loc, rhs, lhs, + transferKindAttr); + } + return; + } + TODO(loc, "Assignement with CUDA Fortran variables"); + } + void genDataAssignment( const Fortran::evaluate::Assignment &assign, const Fortran::evaluate::ProcedureRef *userDefinedAssignment) { mlir::Location loc = getCurrentLocation(); fir::FirOpBuilder &builder = getFirOpBuilder(); - if (Fortran::evaluate::HasCUDAAttrs(assign.lhs) || - Fortran::evaluate::HasCUDAAttrs(assign.rhs)) - TODO(loc, "Assignement with CUDA Fortran variables"); + bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs); + bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs); // Gather some information about the assignment that will impact how it is // lowered. @@ -3772,9 +3796,13 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::lower::StatementContext localStmtCtx; hlfir::Entity rhs = evaluateRhs(localStmtCtx); hlfir::Entity lhs = evaluateLhs(localStmtCtx); - builder.create(loc, rhs, lhs, - isWholeAllocatableAssignment, - keepLhsLengthInAllocatableAssignment); + if (lhsIsDevice || rhsIsDevice) { + genCUDADataTransfer(builder, loc, lhsIsDevice, lhs, rhsIsDevice, rhs); + } else { + builder.create(loc, rhs, lhs, + isWholeAllocatableAssignment, + keepLhsLengthInAllocatableAssignment); + } return; } // Assignments inside Forall, Where, or assignments to a vector subscripted diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp index 0cf8dfb9f784c..e43710f5627ee 100644 --- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp +++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp @@ -299,5 +299,6 @@ void FIROpsDialect::registerAttributes() { addAttributes(); + CUDALaunchBoundsAttr, CUDAClusterDimsAttr, + CUDADataTransferKindAttr>(); } diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf new file mode 100644 index 0000000000000..54226b8623e6a --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -0,0 +1,57 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +! Test CUDA Fortran data transfer using assignment statements. + +subroutine sub1() + integer, device :: m + integer, device :: adev(10) + integer :: i, ahost(10), bhost(10) + + m = 1 + i + + m = 1 + + adev = ahost + + adev = ahost + 1 + + adev(1:5) = ahost(1:5) + + adev = ahost + bhost + +end + +! CHECK-LABEL: func.func @_QPsub1() + +! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub1Eadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QFsub1Em"} : (!fir.ref) -> (!fir.ref, !fir.ref) + +! CHECK: %[[C1:.*]] = arith.constant 1 : i32 +! CHECK: %[[LOADED_I:.*]] = fir.load %[[I]]#0 : !fir.ref +! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[LOADED_I]] : i32 +! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ADD]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref, !fir.ref, i1) +! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref, !fir.ref +! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref, i1 + +! CHECK: %[[C1:.*]] = arith.constant 1 : i32 +! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[C1]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref, !fir.ref, i1) +! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref, !fir.ref +! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref, i1 + +! CHECK: fir.cuda_data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> + +! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> { +! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) +! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref>, i1 + +! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> +! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> +! CHECK: fir.cuda_data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> + +! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> +! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) +! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref>, i1