diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index d63fdd88f7910..739b1f67be7cb 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1528,21 +1528,32 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove, Symbol]> { let summary = "declares a reduction kind"; let description = [{ - Declares an OpenMP reduction kind. This requires two mandatory and two + Declares an OpenMP reduction kind. This requires two mandatory and three optional regions. - 1. The initializer region specifies how to initialize the thread-local + 1. The optional alloc region specifies how to allocate the thread-local + reduction value. This region should not contain control flow and all + IR should be suitable for inlining straight into an entry block. In + the common case this is expected to contain only allocas. It is + expected to `omp.yield` the allocated value on all control paths. + If allocation is conditional (e.g. only allocate if the mold is + allocated), this should be done in the initilizer region and this + region not included. The alloc region is not used for by-value + reductions (where allocation is implicit). + 2. The initializer region specifies how to initialize the thread-local reduction value. This is usually the neutral element of the reduction. For convenience, the region has an argument that contains the value - of the reduction accumulator at the start of the reduction. It is - expected to `omp.yield` the new value on all control flow paths. - 2. The reduction region specifies how to combine two values into one, i.e. + of the reduction accumulator at the start of the reduction. If an alloc + region is specified, there is a second block argument containing the + address of the allocated memory. The initializer region is expected to + `omp.yield` the new value on all control flow paths. + 3. The reduction region specifies how to combine two values into one, i.e. the reduction operator. It accepts the two values as arguments and is expected to `omp.yield` the combined value on all control flow paths. - 3. The atomic reduction region is optional and specifies how two values + 4. The atomic reduction region is optional and specifies how two values can be combined atomically given local accumulator variables. It is expected to store the combined value in the first accumulator variable. - 4. The cleanup region is optional and specifies how to clean up any memory + 5. The cleanup region is optional and specifies how to clean up any memory allocated by the initializer region. The region has an argument that contains the value of the thread-local reduction accumulator. This will be executed after the reduction has completed. @@ -1558,12 +1569,14 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove, let arguments = (ins SymbolNameAttr:$sym_name, TypeAttr:$type); - let regions = (region AnyRegion:$initializerRegion, + let regions = (region MaxSizedRegion<1>:$allocRegion, + AnyRegion:$initializerRegion, AnyRegion:$reductionRegion, AnyRegion:$atomicReductionRegion, AnyRegion:$cleanupRegion); let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword " + "custom($allocRegion) " "`init` $initializerRegion " "`combiner` $reductionRegion " "custom($atomicReductionRegion) " @@ -1576,6 +1589,17 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove, return cast(getAtomicReductionRegion().front().getArgument(0).getType()); } + + Value getInitializerMoldArg() { + return getInitializerRegion().front().getArgument(0); + } + + Value getInitializerAllocArg() { + if (getAllocRegion().empty() || + getInitializerRegion().front().getNumArguments() != 2) + return {nullptr}; + return getInitializerRegion().front().getArgument(1); + } }]; let hasRegionVerifier = 1; } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 4c943ebbe3144..273f49b8b12b6 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1883,46 +1883,84 @@ LogicalResult DistributeOp::verify() { // DeclareReductionOp //===----------------------------------------------------------------------===// -static ParseResult parseAtomicReductionRegion(OpAsmParser &parser, - Region ®ion) { - if (parser.parseOptionalKeyword("atomic")) +static ParseResult parseOptionalReductionRegion(OpAsmParser &parser, + Region ®ion, + StringRef keyword) { + if (parser.parseOptionalKeyword(keyword)) return success(); return parser.parseRegion(region); } -static void printAtomicReductionRegion(OpAsmPrinter &printer, - DeclareReductionOp op, Region ®ion) { +static void printOptionalReductionRegion(OpAsmPrinter &printer, Region ®ion, + StringRef keyword) { if (region.empty()) return; - printer << "atomic "; + printer << keyword << " "; printer.printRegion(region); } +static ParseResult parseAllocReductionRegion(OpAsmParser &parser, + Region ®ion) { + return parseOptionalReductionRegion(parser, region, "alloc"); +} + +static void printAllocReductionRegion(OpAsmPrinter &printer, + DeclareReductionOp op, Region ®ion) { + printOptionalReductionRegion(printer, region, "alloc"); +} + +static ParseResult parseAtomicReductionRegion(OpAsmParser &parser, + Region ®ion) { + return parseOptionalReductionRegion(parser, region, "atomic"); +} + +static void printAtomicReductionRegion(OpAsmPrinter &printer, + DeclareReductionOp op, Region ®ion) { + printOptionalReductionRegion(printer, region, "atomic"); +} + static ParseResult parseCleanupReductionRegion(OpAsmParser &parser, Region ®ion) { - if (parser.parseOptionalKeyword("cleanup")) - return success(); - return parser.parseRegion(region); + return parseOptionalReductionRegion(parser, region, "cleanup"); } static void printCleanupReductionRegion(OpAsmPrinter &printer, DeclareReductionOp op, Region ®ion) { - if (region.empty()) - return; - printer << "cleanup "; - printer.printRegion(region); + printOptionalReductionRegion(printer, region, "cleanup"); } LogicalResult DeclareReductionOp::verifyRegions() { + if (!getAllocRegion().empty()) { + for (YieldOp yieldOp : getAllocRegion().getOps()) { + if (yieldOp.getResults().size() != 1 || + yieldOp.getResults().getTypes()[0] != getType()) + return emitOpError() << "expects alloc region to yield a value " + "of the reduction type"; + } + } + if (getInitializerRegion().empty()) return emitOpError() << "expects non-empty initializer region"; Block &initializerEntryBlock = getInitializerRegion().front(); - if (initializerEntryBlock.getNumArguments() != 1 || - initializerEntryBlock.getArgument(0).getType() != getType()) { - return emitOpError() << "expects initializer region with one argument " - "of the reduction type"; + + if (initializerEntryBlock.getNumArguments() == 1) { + if (!getAllocRegion().empty()) + return emitOpError() << "expects two arguments to the initializer region " + "when an allocation region is used"; + } else if (initializerEntryBlock.getNumArguments() == 2) { + if (getAllocRegion().empty()) + return emitOpError() << "expects one argument to the initializer region " + "when no allocation region is used"; + } else { + return emitOpError() + << "expects one or two arguments to the initializer region"; } + for (mlir::Value arg : initializerEntryBlock.getArguments()) + if (arg.getType() != getType()) + return emitOpError() << "expects initializer region argument to match " + "the reduction type"; + for (YieldOp yieldOp : getInitializerRegion().getOps()) { if (yieldOp.getResults().size() != 1 || yieldOp.getResults().getTypes()[0] != getType()) diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index c76b07ec94a59..332d22fc2c642 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -565,7 +565,63 @@ func.func @omp_simd_pretty_simdlen_safelen(%lb : index, %ub : index, %step : ind // ----- -// expected-error @below {{op expects initializer region with one argument of the reduction type}} +// expected-error @below {{op expects alloc region to yield a value of the reduction type}} +omp.declare_reduction @add_f32 : f32 +alloc { +^bb0(%arg: f32): +// nonsense test code + %0 = arith.constant 0.0 : f64 + omp.yield (%0 : f64) +} +init { +^bb0(%arg0: f32, %arg1: f32): + %0 = arith.constant 0.0 : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = arith.addf %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} + +// ----- + +// expected-error @below {{op expects two arguments to the initializer region when an allocation region is used}} +omp.declare_reduction @add_f32 : f32 +alloc { +^bb0(%arg: f32): +// nonsense test code + omp.yield (%arg : f32) +} +init { +^bb0(%arg0: f32): + %0 = arith.constant 0.0 : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = arith.addf %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} + +// ----- + +// expected-error @below {{op expects one argument to the initializer region when no allocation region is used}} +omp.declare_reduction @add_f32 : f32 +init { +^bb0(%arg: f32, %arg2: f32): + %0 = arith.constant 0.0 : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = arith.addf %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} + +// ----- + +// expected-error @below {{op expects initializer region argument to match the reduction type}} omp.declare_reduction @add_f32 : f64 init { ^bb0(%arg: f32): @@ -683,6 +739,33 @@ cleanup { // ----- +// expected-error @below {{op region #0 ('allocRegion') failed to verify constraint: region with at most 1 blocks}} +omp.declare_reduction @alloc_reduction : !llvm.ptr +alloc { +^bb0(%arg: !llvm.ptr): + %c1 = arith.constant 1 : i32 + %0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr + cf.br ^bb1(%0: !llvm.ptr) +^bb1(%ret: !llvm.ptr): + omp.yield (%ret : !llvm.ptr) +} +init { +^bb0(%arg: !llvm.ptr): + %cst = arith.constant 1.0 : f32 + llvm.store %cst, %arg : f32, !llvm.ptr + omp.yield (%arg : !llvm.ptr) +} +combiner { +^bb1(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.load %arg0 : !llvm.ptr -> f32 + %1 = llvm.load %arg1 : !llvm.ptr -> f32 + %2 = arith.addf %0, %1 : f32 + llvm.store %2, %arg0 : f32, !llvm.ptr + omp.yield (%arg0 : !llvm.ptr) +} + +// ----- + func.func @foo(%lb : index, %ub : index, %step : index) { %c1 = arith.constant 1 : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 9ac97e069addd..9c308cc010849 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -2541,6 +2541,36 @@ atomic { omp.yield } +// CHECK-LABEL: @alloc_reduction +// CHECK-SAME: alloc { +// CHECK-NEXT: ^bb0(%[[ARG0:.*]]: !llvm.ptr): +// ... +// CHECK: omp.yield +// CHECK-NEXT: } init { +// CHECK: } combiner { +// CHECK: } +omp.declare_reduction @alloc_reduction : !llvm.ptr +alloc { +^bb0(%arg: !llvm.ptr): + %c1 = arith.constant 1 : i32 + %0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr + omp.yield (%0 : !llvm.ptr) +} +init { +^bb0(%mold: !llvm.ptr, %alloc: !llvm.ptr): + %cst = arith.constant 1.0 : f32 + llvm.store %cst, %alloc : f32, !llvm.ptr + omp.yield (%alloc : !llvm.ptr) +} +combiner { +^bb1(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.load %arg0 : !llvm.ptr -> f32 + %1 = llvm.load %arg1 : !llvm.ptr -> f32 + %2 = arith.addf %0, %1 : f32 + llvm.store %2, %arg0 : f32, !llvm.ptr + omp.yield (%arg0 : !llvm.ptr) +} + // CHECK-LABEL: omp_targets_with_map_bounds // CHECK-SAME: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr) func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () {