diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 8b62fe8c022f8..ecad5eb665e01 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3810,12 +3810,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::Location loc = getCurrentLocation(); fir::FirOpBuilder &builder = getFirOpBuilder(); + bool isInDeviceContext = + builder.getRegion().getParentOfType(); bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) || Fortran::evaluate::HasCUDAAttrs(assign.rhs); bool hasCUDAImplicitTransfer = Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs); llvm::SmallVector implicitTemps; - if (hasCUDAImplicitTransfer) + if (hasCUDAImplicitTransfer && !isInDeviceContext) implicitTemps = genCUDAImplicitDataTransfer(builder, loc, assign); // Gather some information about the assignment that will impact how it is @@ -3874,13 +3876,13 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::lower::StatementContext localStmtCtx; hlfir::Entity rhs = evaluateRhs(localStmtCtx); hlfir::Entity lhs = evaluateLhs(localStmtCtx); - if (isCUDATransfer && !hasCUDAImplicitTransfer) + if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext) genCUDADataTransfer(builder, loc, assign, lhs, rhs); else builder.create(loc, rhs, lhs, isWholeAllocatableAssignment, keepLhsLengthInAllocatableAssignment); - if (hasCUDAImplicitTransfer) { + if (hasCUDAImplicitTransfer && !isInDeviceContext) { localSymbols.popScope(); for (mlir::Value temp : implicitTemps) builder.create(loc, temp); diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index 4ebd736315bcb..025d8147e5392 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -119,3 +119,25 @@ end ! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda, uniq_name = "_QFsub3Et"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> + + +! Check that fir.cuda_data_transfer are not generated within cuf kernel +subroutine sub4() + integer, parameter :: n = 10 + real, device :: adev(n) + real :: ahost(n) + real :: b + integer :: i + + adev = ahost + !$cuf kernel do <<<*,*>>> + do i = 1, n + adev(i) = adev(i) + b + enddo +end subroutine + +! CHECK-LABEL: func.func @_QPsub4() +! CHECK: fir.cuda_data_transfer +! CHECK: fir.cuda_kernel<<<*, *>>> +! CHECK-NOT: fir.cuda_data_transfer +! CHECK: hlfir.assign