Skip to content

Commit

Permalink
[MLIR][OpenMP][SCF] Mark parallel regions as allocation scopes
Browse files Browse the repository at this point in the history
MLIR has the notion of allocation scopes which specify that stack allocations (e.g. memref.alloca, llvm.alloca) should be freed or equivalently aren't available at the end of the corresponding region.
Currently neither OpenMP parallel nor SCF parallel regions have the notion of such a scope.

This clearly makes sense for an OpenMP parallel as this is implemented in with a new function which outlines the region, and clearly any allocations in that newly outlined function have a lifetime that ends at the return of the function, by definition.

While SCF.parallel doesn't have a guaranteed runtime which it is implemented with, this similarly makes sense for SCF.parallel since otherwise an allocation within an SCF.parallel will needlessly continue to allocate stack memory that isn't cleaned up until the function (or other allocation scope op) which contains the SCF.parallel returns. This means that it is impossible to represent thread or iteration-local memory without causing a stack blow-up. In the case that this stack-blow-up behavior is intended, this can be equivalently represented with an allocation outside of the SCF.parallel with a size equal to the number of iterations.

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D119743
  • Loading branch information
wsmoses committed Feb 18, 2022
1 parent 1cf790b commit 670aeec
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 8 deletions.
4 changes: 2 additions & 2 deletions mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
Expand Up @@ -107,7 +107,7 @@ def AffineApplyOp : Affine_Op<"apply", [NoSideEffect]> {
}

def AffineForOp : Affine_Op<"for",
[ImplicitAffineTerminator, RecursiveSideEffects,
[AutomaticAllocationScope, ImplicitAffineTerminator, RecursiveSideEffects,
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
let summary = "for operation";
let description = [{
Expand Down Expand Up @@ -608,7 +608,7 @@ def AffineMaxOp : AffineMinMaxOpBase<"max", [NoSideEffect]> {
}

def AffineParallelOp : Affine_Op<"parallel",
[ImplicitAffineTerminator, RecursiveSideEffects,
[AutomaticAllocationScope, ImplicitAffineTerminator, RecursiveSideEffects,
DeclareOpInterfaceMethods<LoopLikeOpInterface>, MemRefsNormalizable]> {
let summary = "multi-index parallel band operation";
let description = [{
Expand Down
2 changes: 1 addition & 1 deletion mlir/include/mlir/Dialect/GPU/GPUOps.td
Expand Up @@ -439,7 +439,7 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func",
let hasVerifier = 1;
}

def GPU_LaunchOp : GPU_Op<"launch">,
def GPU_LaunchOp : GPU_Op<"launch", [AutomaticAllocationScope]>,
Arguments<(ins Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Optional<I32>:$dynamicSharedMemorySize)>,
Expand Down
3 changes: 2 additions & 1 deletion mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Expand Up @@ -80,7 +80,8 @@ def ClauseDefault : I32EnumAttr<
def ClauseDefaultAttr : EnumAttr<OpenMP_Dialect, ClauseDefault,
"clause_default">;

def ParallelOp : OpenMP_Op<"parallel", [AttrSizedOperandSegments,
def ParallelOp : OpenMP_Op<"parallel", [AutomaticAllocationScope,
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<OutlineableOpenMPOpInterface>]> {
let summary = "parallel construct";
let description = [{
Expand Down
5 changes: 3 additions & 2 deletions mlir/include/mlir/Dialect/SCF/SCFOps.td
Expand Up @@ -110,7 +110,7 @@ def ExecuteRegionOp : SCF_Op<"execute_region"> {
}

def ForOp : SCF_Op<"for",
[DeclareOpInterfaceMethods<LoopLikeOpInterface>,
[AutomaticAllocationScope, DeclareOpInterfaceMethods<LoopLikeOpInterface>,
DeclareOpInterfaceMethods<RegionBranchOpInterface>,
SingleBlockImplicitTerminator<"scf::YieldOp">,
RecursiveSideEffects]> {
Expand Down Expand Up @@ -404,7 +404,8 @@ def IfOp : SCF_Op<"if",
}

def ParallelOp : SCF_Op<"parallel",
[AttrSizedOperandSegments,
[AutomaticAllocationScope,
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<LoopLikeOpInterface>,
RecursiveSideEffects,
SingleBlockImplicitTerminator<"scf::YieldOp">]> {
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
Expand Up @@ -83,7 +83,6 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
// CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
%f0 = arith.constant 0.0: f32
// CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
Expand All @@ -94,6 +93,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
// CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
// CHECK: scf.if
// CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I4]])
Expand Down Expand Up @@ -149,7 +149,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {

// CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
// CHECK-DAG: %{{.*}} = arith.constant dense<1.000000e+00> : vector<5x4x3xf32>
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
Expand All @@ -161,6 +160,7 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
// CHECK: memref.store %{{.*}}, %[[ALLOC]][] : memref<vector<5x4x3xf32>>
// CHECK: %[[VECTOR_VIEW1:.*]] = vector.type_cast %[[ALLOC]] : memref<vector<5x4x3xf32>> to memref<5xvector<4x3xf32>>
// CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
Expand Down

0 comments on commit 670aeec

Please sign in to comment.