[mlir][bufferize] Infer memory space in all bufferization patterns

This change updates all remaining bufferization patterns (except for scf.while) and the remaining bufferization infrastructure to infer the memory space whenever possible instead of falling back to "0". (If a default memory space is set in the bufferization options, we still fall back to that value if the memory space could not be inferred.) Differential Revision: https://reviews.llvm.org/D128423
llvm · Jun 27, 2022 · c0b0b6a · c0b0b6a
1 parent 13fb97d
commit c0b0b6a
Show file tree

Hide file tree

Showing 16 changed files with 273 additions and 8 deletions.
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
@@ -355,6 +355,25 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
           return bufferization::getMemRefType(tensorType, options);
         }]
       >,
+      InterfaceMethod<
+        /*desc=*/[{
+          Return the memory space of the given tensor OpResult if specified on
+          this op. If not specified, return `failure`.
+
+          This method will never be called with OpResults that do not bufferize
+          to a memory allocation.
+        }],
+        /*retType=*/"FailureOr<unsigned>",
+        /*methodName=*/"getMemorySpace",
+        /*args=*/(ins "OpResult":$opResult),
+        /*methodBody=*/"",
+        /*defaultImplementation=*/[{
+          assert(cast<BufferizableOpInterface>($_op.getOperation())
+                     .bufferizesToAllocation(opResult)
+                 && "expected allocation");
+          return failure();
+        }]
+      >,
   ];
 
   let extraClassDeclaration = [{

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -47,8 +47,9 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor",
     another op.
 
     The optional `memory_space` attribute specifies the memory space when
-    bufferizing this op. If `memory_space` is not specified, the default memory
-    space is used during bufferization.
+    bufferizing this op. The memory space is inferred from `copy` if specified.
+    If neigher `copy` nor `memory_space` is specified, the default memory space
+    is used during bufferization.
 
     Both dense and sparse tensor types are supported. The result of a
     `bufferization.alloc_tensor` is a tensor value that can be used like any
@@ -81,6 +82,12 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor",
 
     bool bufferizesToAllocation(OpResult opResult) { return true; }
 
+    FailureOr<unsigned> getMemorySpace(OpResult opResult) {
+      if (getMemorySpace().hasValue())
+        return static_cast<unsigned>(*getMemorySpace());
+      return failure();
+    }
+
     bool bufferizesToMemoryRead(OpOperand &opOperand,
                                 const AnalysisState &state);
 

diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -347,6 +347,10 @@ def TensorCopyInsertion : Pass<"tensor-copy-insertion"> {
            "Bufferize function boundaries (experimental).">,
     Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
            "Specify if new allocations should be deallocated.">,
+    Option<"mustInferMemorySpace", "must-infer-memory-space", "bool",
+           /*default=*/"false",
+           "The memory space of an memref types must always be inferred. If "
+           "unset, a default memory space of 0 is used otherwise.">,
   ];
   let constructor = "mlir::bufferization::createTensorCopyInsertionPass()";
 }

diff --git a/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -26,6 +26,11 @@ struct ConstantOpInterface
                           const BufferizationOptions &options) const {
     auto constantOp = cast<arith::ConstantOp>(op);
 
+    // TODO: Implement memory space for this op. E.g., by adding a memory_space
+    // attribute to ConstantOp.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     // Only ranked tensors are supported.
     if (!constantOp.getType().isa<RankedTensorType>())
       return failure();
@@ -150,6 +155,10 @@ struct SelectOpInterface
       return failure();
     Value trueBuffer = *maybeTrueBuffer;
     Value falseBuffer = *maybeFalseBuffer;
+    BaseMemRefType trueType = trueBuffer.getType().cast<BaseMemRefType>();
+    BaseMemRefType falseType = falseBuffer.getType().cast<BaseMemRefType>();
+    if (trueType.getMemorySpaceAsInt() != falseType.getMemorySpaceAsInt())
+      return op->emitError("inconsistent memory space on true/false operands");
 
     // The "true" and the "false" operands must have the same type. If the
     // buffers have different types, they differ only in their layout map. Cast

diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -43,6 +43,13 @@ using namespace bufferization;
 constexpr const ::llvm::StringLiteral
     bufferization::BufferizableOpInterface::kInplaceableAttrName;
 
+/// Return the owner of the given value.
+static Operation *getOwnerOfValue(Value value) {
+  if (auto opResult = value.dyn_cast<OpResult>())
+    return opResult.getDefiningOp();
+  return value.cast<BlockArgument>().getOwner()->getParentOp();
+}
+
 /// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
 /// shaped value is copied. Otherwise, a tensor with undefined contents is
 /// allocated.
@@ -84,10 +91,21 @@ FailureOr<Value> bufferization::allocateTensorForShapedValue(
       populateDynamicDimSizes(b, loc, tensor, dynamicSizes);
   }
 
+  // Create AllocTensorOp.
   auto allocTensorOp = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
                                                copy ? tensor : Value());
   allocTensorOp->setAttr(BufferizationDialect::kEscapeAttrName,
                          b.getBoolArrayAttr({escape}));
+
+  // Add 'memory_space' attribute. Not needed if 'copy' operand is specified.
+  if (copy)
+    return allocTensorOp.getResult();
+  FailureOr<BaseMemRefType> copyBufferType = getBufferType(tensor, options);
+  if (failed(copyBufferType))
+    return failure();
+  allocTensorOp.setMemorySpaceAttr(
+      b.getIntegerAttr(b.getIntegerType(64, /*isSigned=*/false),
+                       copyBufferType->getMemorySpaceAsInt()));
   return allocTensorOp.getResult();
 }
 
@@ -512,16 +530,43 @@ FailureOr<BaseMemRefType>
 bufferization::getBufferType(Value value, const BufferizationOptions &options) {
   auto tensorType = value.getType().dyn_cast<TensorType>();
   assert(tensorType && "unexpected non-tensor type");
+  Operation *op = getOwnerOfValue(value);
 
+  // ToTensorOp: Take buffer type directly from the op.
   if (auto toTensorOp = value.getDefiningOp<bufferization::ToTensorOp>())
     return toTensorOp.getMemref().getType().cast<BaseMemRefType>();
 
+  // If value is a bbArg of a bufferizable op: query op interface.
   if (auto bbArg = value.dyn_cast<BlockArgument>())
     if (auto bufferizableOp =
             options.dynCastBufferizableOp(bbArg.getOwner()->getParentOp()))
       return bufferizableOp.getBufferType(bbArg, options);
 
-  return getMemRefType(tensorType, options);
+  // Check value is a new buffer allocation with a memory space attribute. In
+  // that case we can at least infer the memory space.
+  Optional<unsigned> memorySpace = None;
+  if (auto opResult = value.dyn_cast<OpResult>()) {
+    if (auto bufferizableOp =
+            options.dynCastBufferizableOp(opResult.getDefiningOp())) {
+      if (bufferizableOp.bufferizesToAllocation(opResult)) {
+        FailureOr<unsigned> queriedMemorySpace =
+            bufferizableOp.getMemorySpace(opResult);
+        if (!failed(queriedMemorySpace))
+          memorySpace = *queriedMemorySpace;
+      }
+    }
+  }
+
+  // If we still do not know the memory space, use the default memory space (if
+  // any).
+  if (!memorySpace.hasValue())
+    memorySpace = options.defaultMemorySpace;
+
+  // If we still do not know the memory space, report a failure.
+  if (!memorySpace.hasValue())
+    return op->emitError("could not infer memory space");
+
+  return getMemRefType(tensorType, options, /*layout=*/{}, *memorySpace);
 }
 
 void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,

diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -176,6 +176,9 @@ LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter,
   unsigned memorySpace;
   if (getMemorySpace().hasValue()) {
     memorySpace = *getMemorySpace();
+  } else if (getCopy()) {
+    memorySpace =
+        copyBuffer.getType().cast<BaseMemRefType>().getMemorySpaceAsInt();
   } else if (options.defaultMemorySpace.hasValue()) {
     memorySpace = *options.defaultMemorySpace;
   } else {

diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -105,6 +105,8 @@ struct TensorCopyInsertionPass
       options.allowReturnAllocs = allowReturnAllocs;
       options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
       options.createDeallocs = createDeallocs;
+      if (mustInferMemorySpace)
+        options.defaultMemorySpace = None;
       if (failed(insertTensorCopies(getOperation(), options)))
         signalPassFailure();
     }

diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -707,6 +707,8 @@ struct WhileOpInterface
     return success();
   }
 
+  // TODO: Implement getBufferType interface method and infer buffer types.
+
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto whileOp = cast<scf::WhileOp>(op);

diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -379,6 +379,10 @@ struct FromElementsOpInterface
                           const BufferizationOptions &options) const {
     auto fromElementsOp = cast<tensor::FromElementsOp>(op);
 
+    // TODO: Implement memory space for this op.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     // Allocate a buffer for the result.
     Location loc = op->getLoc();
     auto tensorType = fromElementsOp.getType().cast<RankedTensorType>();
@@ -435,6 +439,11 @@ struct GenerateOpInterface
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto generateOp = cast<tensor::GenerateOp>(op);
+
+    // TODO: Implement memory space for this op.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     auto tensorType = generateOp.getType().cast<RankedTensorType>();
     // Allocate memory.
     Location loc = op->getLoc();
@@ -792,7 +801,9 @@ struct ReshapeOpInterface
     if (failed(srcBuffer) || failed(shapeBuffer))
       return failure();
     auto resultTensorType = reshapeOp.getResult().getType().cast<TensorType>();
-    auto resultMemRefType = getMemRefType(resultTensorType, options);
+    auto resultMemRefType = getMemRefType(
+        resultTensorType, options, /*layout=*/{},
+        srcBuffer->getType().cast<BaseMemRefType>().getMemorySpaceAsInt());
     replaceOpWithNewBufferizedOp<memref::ReshapeOp>(
         rewriter, op, resultMemRefType, *srcBuffer, *shapeBuffer);
     return success();

diff --git a/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir b/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt %s -one-shot-bufferize="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_arith_select(%c: i1) -> tensor<10xf32> {
+  // Selecting tensors with different memory spaces. Such IR cannot be
+  // bufferized.
+  %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+  %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // expected-error @+2 {{inconsistent memory space on true/false operands}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %r = arith.select %c, %0, %1 : tensor<10xf32>
+  func.return %r : tensor<10xf32>
+}
+
+// -----
+
+func.func @constant_memory_space(%idx: index, %v: i32) -> tensor<3xi32> {
+  // expected-error @+2 {{memory space not implemented yet}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %cst = arith.constant dense<[5, 1000, 20]> : tensor<3xi32>
+  %0 = tensor.insert %v into %cst[%idx] : tensor<3xi32>
+  return %0 : tensor<3xi32>
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
@@ -6,3 +6,14 @@ func.func @alloc_tensor_without_memory_space() -> tensor<10xf32> {
   %0 = bufferization.alloc_tensor() : tensor<10xf32>
   return %0 : tensor<10xf32>
 }
+
+// -----
+
+func.func @memory_space_of_unknown_op() -> f32 {
+  %c0 = arith.constant 0 : index
+  // expected-error @+1 {{could not infer memory space}}
+  %t = "test.dummy_op"() : () -> (tensor<10xf32>)
+  // expected-error @+1 {{failed to bufferize op}}
+  %s = tensor.extract %t[%c0] : tensor<10xf32>
+  return %s : f32
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+// An alloc is inserted but the copy is emitted. Therefore, the memory space
+// should be specified on the alloc_tensor op.
+func.func @memory_space_of_unknown_op() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // expected-error @+1 {{could not infer memory space}}
+  %t = bufferization.alloc_tensor() : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file | FileCheck %s
+
+// CHECK-LABEL: func @unknown_op_copy
+func.func @unknown_op_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // CHECK: %[[dummy:.*]] = "test.dummy_op"() : () -> tensor<10xf32>
+  %t = "test.dummy_op"() : () -> tensor<10xf32>
+  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[dummy]]) {bufferization.escape = [false]} : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @alloc_tensor_copy
+func.func @alloc_tensor_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  %t = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -40,10 +40,10 @@ func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
 {
   // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
   // The second alloc_tensor should not have a copy operand.
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
 
   // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true], memory_space = 0 : ui64} : tensor<5xf32>
   %0 = bufferization.alloc_tensor() : tensor<5xf32>
   %1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
   return %0, %1 : tensor<5xf32>, tensor<5xf32>
@@ -55,7 +55,7 @@ func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
 func.func @do_not_copy_when_overwritten(%t: tensor<5xf32>, %f: f32)
   -> (tensor<5xf32>, tensor<5xf32>)
 {
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
@@ -74,7 +74,7 @@ func.func @do_not_copy_when_result_not_read(%t: tensor<5xf32>, %f: f32)
   -> (tensor<3xf32>)
 {
   %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<3xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<3xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],

diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
@@ -0,0 +1,16 @@
+// RUN: mlir-opt %s -one-shot-bufferize -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_scf_if(%c: i1) -> tensor<10xf32> {
+  // Yielding tensors with different memory spaces. Such IR cannot be
+  // bufferized.
+  %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+  %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // expected-error @+2 {{inconsistent memory space on then/else branches}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %r = scf.if %c -> tensor<10xf32> {
+    scf.yield %0 : tensor<10xf32>
+  } else {
+    scf.yield %1 : tensor<10xf32>
+  }
+  func.return %r : tensor<10xf32>
+}