diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index f4792637f481c..6e520d111701f 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -3131,6 +3131,16 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> { def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments, DeclareOpInterfaceMethods]> { + let description = [{ + Represent the CUDA Fortran kernel directive. The operation is a loop like + operation that represents the iteration range of the embedded loop nest. + + When grid or block variadic operands are empty, a `*` only syntax was used + in the Fortran code. + If the `*` is mixed with values for either grid or block, these are + represented by a 0 constant value. + }]; + let arguments = (ins Variadic:$grid, // empty means `*` Variadic:$block, // empty means `*` diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 650ec5db2d0cc..1b9a8a867b080 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2529,23 +2529,42 @@ class FirConverter : public Fortran::lower::AbstractConverter { const std::optional &stream = std::get<3>(dir.t); + auto isOnlyStars = + [&](const std::list + &list) -> bool { + for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : + list) { + if (expr.v) + return false; + } + return true; + }; + + mlir::Value zero = + builder->createIntegerConstant(loc, builder->getI32Type(), 0); + llvm::SmallVector gridValues; - for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : grid) { - if (expr.v) { - gridValues.push_back(fir::getBase( - genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx))); - } else { - // TODO: '*' + if (!isOnlyStars(grid)) { + for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : + grid) { + if (expr.v) { + gridValues.push_back(fir::getBase( + genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx))); + } else { + gridValues.push_back(zero); + } } } llvm::SmallVector blockValues; - for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : - block) { - if (expr.v) { - blockValues.push_back(fir::getBase( - genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx))); - } else { - // TODO: '*' + if (!isOnlyStars(block)) { + for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : + block) { + if (expr.v) { + blockValues.push_back(fir::getBase( + genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx))); + } else { + blockValues.push_back(zero); + } } } mlir::Value streamValue; diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf index c017561447f85..6179e609db383 100644 --- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf +++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf @@ -42,7 +42,20 @@ subroutine sub1() ! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) ! CHECK: {n = 2 : i64} -! TODO: lowering for these cases -! !$cuf kernel do(2) <<< (1,*), (256,1) >>> -! !$cuf kernel do(2) <<< (*,*), (32,4) >>> + !$cuf kernel do(2) <<< (1,*), (256,1) >>> + do i = 1, n + do j = 1, n + c(i,j) = c(i,j) * d(i,j) + end do + end do +! CHECK: fir.cuda_kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) + +!$cuf kernel do(2) <<< (*,*), (32,4) >>> + do i = 1, n + do j = 1, n + c(i,j) = c(i,j) * d(i,j) + end do + end do + +! CHECK: fir.cuda_kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index) end