diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index f9b9b850ad839..4a9e49435a907 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -2222,6 +2222,9 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter, addOperands(operands, operandSegments, tileOperands); addOperands(operands, operandSegments, cacheOperands); addOperands(operands, operandSegments, privateOperands); + // fill empty firstprivate operands since they are not permitted + // from OpenACC language perspective. + addOperands(operands, operandSegments, {}); addOperands(operands, operandSegments, reductionOperands); auto loopOp = createRegionOp( diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 01ab6df8f6c72..77e833f8f9492 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2383,15 +2383,38 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", let summary = "loop construct"; let description = [{ - The "acc.loop" operation represents the OpenACC loop construct. The lower - and upper bounds specify a half-open range: the range includes the lower - bound but does not include the upper bound. If the `inclusive` attribute is - set then the upper bound is included. + The `acc.loop` operation represents the OpenACC loop construct and when + bounds are included, the associated source language loop iterators. The + lower and upper bounds specify a half-open range: the range includes the + lower bound but does not include the upper bound. If the `inclusive` + attribute is set then the upper bound is included. + + In cases where the OpenACC loop directive needs to capture multiple + source language loops, such as in the case of `collapse` or `tile`, + the multiple induction arguments are used to capture each case. Having + such a representation makes sure no intermediate transformation such + as Loop Invariant Code Motion breaks the property requested by the + clause on the loop constructs. + + Each `acc.loop` holds private and reduction operands which are the + ssa values from the corresponding `acc.private` or `acc.reduction` + operations. Additionally, firstprivate operands are supported to + represent cases where privatization is needed with initialization + from an original value. While the OpenACC specification does not + explicitly support firstprivate on loop constructs, this extension + enables representing privatization scenarios that arise from an + optimization and codegen pipeline operating on acc dialect. + + The operation supports capturing information that it comes combined + constructs (e.g., `parallel loop`, `kernels loop`, `serial loop`) + through the `combined` attribute despite requiring the `acc.loop` + to be decomposed from the compute operation representing compute + construct. Example: ```mlir - acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = + acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = (%c0, %c0, %c0 : index, index, index) to (%c10, %c10, %c10 : index, index, index) step (%c1, %c1, %c1 : index, index, index) { @@ -2400,10 +2423,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", } attributes { collapse = [3] } ``` - `collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto` and - `tile` operands are supported with `device_type` information. They should - only be accessed by the extra provided getters. If modified, the - corresponding `device_type` attributes must be modified as well. + `collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto`, + `cache`, and `tile` operands are supported with `device_type` + information. These clauses should only be accessed through the provided + device-type-aware getter methods. When modifying these operands, the + corresponding `device_type` attributes must be updated to maintain + consistency between operands and their target device types. }]; let arguments = (ins @@ -2433,6 +2458,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", Variadic:$cacheOperands, Variadic:$privateOperands, OptionalAttr:$privatizationRecipes, + Variadic:$firstprivateOperands, + OptionalAttr:$firstprivatizationRecipes, Variadic:$reductionOperands, OptionalAttr:$reductionRecipes, OptionalAttr:$combined @@ -2589,6 +2616,10 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", /// Adds a private clause variable to this operation, including its recipe. void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op, mlir::acc::PrivateRecipeOp recipe); + /// Adds a firstprivate clause variable to this operation, including its + /// recipe. + void addFirstPrivatization(MLIRContext *, mlir::acc::FirstprivateOp op, + mlir::acc::FirstprivateRecipeOp recipe); /// Adds a reduction clause variable to this operation, including its /// recipe. void addReduction(MLIRContext *, mlir::acc::ReductionOp op, @@ -2609,6 +2640,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", type($vectorOperands), $vectorOperandsDeviceType, $vector) | `private` `(` custom( $privateOperands, type($privateOperands), $privatizationRecipes) `)` + | `firstprivate` `(` custom($firstprivateOperands, + type($firstprivateOperands), $firstprivatizationRecipes) `)` | `tile` `(` custom($tileOperands, type($tileOperands), $tileOperandsDeviceType, $tileOperandsSegments) `)` @@ -2665,6 +2698,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", /*cacheOperands=*/{}, /*privateOperands=*/{}, /*privatizationRecipes=*/nullptr, + /*firstprivateOperands=*/{}, + /*firstprivatizationRecipes=*/nullptr, /*reductionOperands=*/{}, /*reductionRecipes=*/nullptr, /*combined=*/nullptr); diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index ee3e4029abfb2..6598ac141008f 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -2674,6 +2674,11 @@ LogicalResult acc::LoopOp::verify() { "privatizations", false))) return failure(); + if (failed(checkSymOperandList( + *this, getFirstprivatizationRecipes(), getFirstprivateOperands(), + "firstprivate", "firstprivatizations", /*checkOperandType=*/false))) + return failure(); + if (failed(checkSymOperandList( *this, getReductionRecipes(), getReductionOperands(), "reduction", "reductions", false))) @@ -2737,7 +2742,8 @@ LogicalResult acc::LoopOp::verify() { } unsigned LoopOp::getNumDataOperands() { - return getReductionOperands().size() + getPrivateOperands().size(); + return getReductionOperands().size() + getPrivateOperands().size() + + getFirstprivateOperands().size(); } Value LoopOp::getDataOperand(unsigned i) { @@ -3117,6 +3123,21 @@ void acc::LoopOp::addPrivatization(MLIRContext *context, setPrivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes)); } +void acc::LoopOp::addFirstPrivatization( + MLIRContext *context, mlir::acc::FirstprivateOp op, + mlir::acc::FirstprivateRecipeOp recipe) { + getFirstprivateOperandsMutable().append(op.getResult()); + + llvm::SmallVector recipes; + + if (getFirstprivatizationRecipesAttr()) + llvm::copy(getFirstprivatizationRecipesAttr(), std::back_inserter(recipes)); + + recipes.push_back( + mlir::SymbolRefAttr::get(context, recipe.getSymName().str())); + setFirstprivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes)); +} + void acc::LoopOp::addReduction(MLIRContext *context, mlir::acc::ReductionOp op, mlir::acc::ReductionRecipeOp recipe) { getReductionOperandsMutable().append(op.getResult()); diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index cb69058268172..1484d7efd87c2 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -358,6 +358,41 @@ func.func @acc_loop_multiple_block() { // ----- +acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init { +^bb0(%arg0: memref<10xf32>): + %0 = memref.alloca() : memref<10xf32> + acc.yield %0 : memref<10xf32> +} copy { +^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): + memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32> + acc.terminator +} destroy { +^bb0(%arg0: memref<10xf32>): + acc.terminator +} + +func.func @testloopfirstprivate(%a: memref<10xf32>, %b: memref<10xf32>) -> () { + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %firstprivate = acc.firstprivate varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> + acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %firstprivate : memref<10xf32>) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { + "test.openacc_dummy_op"() : () -> () + acc.yield + } attributes {inclusiveUpperbound = array, independent = [#acc.device_type]} + return +} + +// CHECK-LABEL: func.func @testloopfirstprivate( +// CHECK-SAME: %[[ARG0:.*]]: memref<10xf32>, %[[ARG1:.*]]: memref<10xf32>) +// CHECK: %[[FIRSTPRIVATE:.*]] = acc.firstprivate varPtr(%[[ARG0]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> +// CHECK: acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %[[FIRSTPRIVATE]] : memref<10xf32>) control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { +// CHECK: "test.openacc_dummy_op"() : () -> () +// CHECK: acc.yield +// CHECK: } attributes {inclusiveUpperbound = array, independent = [#acc.device_type]} + +// ----- + acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { ^bb0(%arg0: memref<10xf32>): %0 = memref.alloc() : memref<10xf32> @@ -535,6 +570,7 @@ acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init acc.yield %0 : memref<10xf32> } copy { ^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): + memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32> acc.terminator } destroy { ^bb0(%arg0: memref<10xf32>):